diff --git a/CMakeLists.txt b/CMakeLists.txt index e0850a84..eb12c8bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,12 +52,12 @@ set(CPACK_PACKAGE_VERSION_MAJOR "${VERSION_MAJOR}") set(CPACK_PACKAGE_VERSION_MINOR "${VERSION_MINOR}") set(CPACK_PACKAGE_VERSION_PATCH "${VERSION_PATCH}") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY - "A high performance BSP communications library" ) + "A high performance BSP communications library" ) set(CPACK_SOURCE_GENERATOR "TGZ" ) set(CPACK_SOURCE_IGNORE_FILES "/\\\\.git/" "/\\\\.svn/" "\\\\.swp$" "/site/" "/build/" "/pclint/" "/junit/" "/ideas/" ) set(CPACK_SOURCE_PACKAGE_FILE_NAME - "LPF-${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}-${VERSION_PACKAGE}") + "LPF-${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}-${VERSION_PACKAGE}") set(CPACK_GENERATOR "RPM") set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") @@ -183,10 +183,29 @@ endif() #enable the hybrid engine if ( LIB_POSIX_THREADS AND LIB_MATH AND LIB_DL AND MPI_FOUND - AND MPI_IS_THREAD_COMPAT AND MPI_IS_NOT_OPENMPI1 - AND ENABLE_IBVERBS ) - list(APPEND ENGINES "hybrid") - set(HYBRID_ENGINE_ENABLED on) + AND MPI_IS_THREAD_COMPAT AND MPI_IS_NOT_OPENMPI1 ) + if( ENABLE_IBVERBS ) + set(LPFLIB_HYBRID_MPI_ENGINE "ibverbs" CACHE STRING + "Choice of MPI engine to use for inter-process communication") + list(APPEND ENGINES "hybrid") + set(HYBRID_ENGINE_ENABLED on) + elseif( MPI_RMA ) + set(LPFLIB_HYBRID_MPI_ENGINE "mpirma" CACHE STRING + "Choice of MPI engine to use for inter-process communication") + list(APPEND ENGINES "hybrid") + set(HYBRID_ENGINE_ENABLED on) + elseif( LIB_MATH AND LIB_DL AND MPI_FOUND ) + set(LPFLIB_HYBRID_MPI_ENGINE "mpimsg" CACHE STRING + "Choice of MPI engine to use for inter-process communication") + list(APPEND ENGINES "hybrid") + set(HYBRID_ENGINE_ENABLED on) + endif() + if( HYBRID_ENGINE_ENABLED ) + message( "Hybrid engine will be built using the ${LPFLIB_HYBRID_MPI_ENGINE} engine" ) + else() + message( "No suitable inter-node communication engine found; " + "hybrid engine will not be built" ) + endif() endif() message( STATUS "The following engines will be built: ${ENGINES}") @@ -209,6 +228,7 @@ endif() # When system is not Linux, enable conditionally compiled blocks if (APPLE) + message( WARNING "LPF compilation on OS X is not regularly tested" ) add_definitions(-DLPF_ON_MACOS=1) endif() @@ -233,8 +253,8 @@ option(LPF_ENABLE_TESTS "Enable unit and API tests. This uses Google Testing and Mocking Framework" OFF) option(GTEST_AGREE_TO_LICENSE - "Does the user agree to the GoogleTest license" - OFF) + "Does the user agree to the GoogleTest license" + OFF) # C++ standard -- Google tests require newer C++ standard than C++11 if (LPF_ENABLE_TESTS) @@ -312,14 +332,43 @@ endfunction(target_compile_flags) # Source set(lpf_cflags) set(lpf_lib_link_flags) -set(lpf_exe_link_flags "-rdynamic") +set(lpf_exe_link_flags) + +# Populate lpf_cflags, lpf_lib_link_flags, lpf_exe_link_flags according to +# (enabled) engine requirements +# - 0) PThreads engine needs nothing special +# - 1) MPI-based engines: +if ( LIB_MATH AND LIB_DL AND MPI_FOUND ) + # -fPIC and -rdynamic are necessary to ensure that symbols can be + # looked up by dlsym which is the mechanism lpf_exec uses to broadcast the + # function that should be executed + set(rdyn_lflag "-rdynamic") + if (APPLE) + # OS X does not support -rdynamic + set(rdyn_lflag "") + endif () + + # include flags: + set( mpi_include_flags ) + string( REPLACE ";" " -I" mpi_include_flags "${MPI_C_INCLUDE_PATH}" ) + set(lpf_cflags "${lpf_cflags} -I${mpi_include_flags} -fPIC") + + # linker flags: + set(lib_lflags "${MPI_C_LINK_FLAGS}") #Note: the core library is already linked with MPI_C_LIBRARIES. + string(REPLACE ";" " " lib_lflags "${lib_lflags}") # So, no need to also link executables with it. + set(lpf_lib_link_flags "${lpf_lib_link_flags} ${lib_lflags} ${rdyn_lflag}") + + # executable linker flags: + set(lpf_exe_link_flags "${lpf_exe_link_flags} ${rdyn_lflag}") +endif () +# ...add requirements from other engines here... # Collating all compile & link flags set(LPF_CORE_COMPILE_FLAGS "${lpf_cflags}" CACHE STRING "Compilation flags for all user code" ) set(LPF_CORE_LIB_LINK_FLAGS "${lpf_lib_link_flags}" CACHE STRING "Flags to link user libraries" ) set(LPF_CORE_EXE_LINK_FLAGS "${lpf_exe_link_flags}" CACHE STRING "Flags to link user executables" ) -# Compiling LPF programmes in the build dir +# Compiling LPF programs in the build dir function( target_link_exe_with_core target ) set(engine "imp") if (ARGV1) @@ -343,10 +392,12 @@ if (LPF_ENABLE_TESTS) message(STATUS "Unit and API tests will be built. This requires CMake version 3.29 or higher, since we use recent features of the GoogleTest package in CMake.") if (NOT GTEST_AGREE_TO_LICENSE) - message(FATAL_ERROR "The user needs to agree with the GoogleTest license to use tests (option GTEST_AGREE_TO_LICENSE=TRUE)") + message(FATAL_ERROR "The user needs to agree with the GoogleTest license to use tests (option GTEST_AGREE_TO_LICENSE=TRUE)") endif() # Enable testing in CMake enable_testing() + include(ProcessorCount) + ProcessorCount(processorCount) find_package(GTest) include(GoogleTest) if(NOT GTest_FOUND) # if not found, download it and pull it in @@ -367,64 +418,84 @@ if (LPF_ENABLE_TESTS) file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/junit) set(test_output "${CMAKE_BINARY_DIR}/junit") - set(MY_TEST_LAUNCHER ${CMAKE_BINARY_DIR}/test_launcher.py) - configure_file( ${CMAKE_SOURCE_DIR}/test_launcher.py ${MY_TEST_LAUNCHER} @ONLY FILE_PERMISSIONS WORLD_EXECUTE OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ) - if( NOT Python3_FOUND ) - find_package( Python3 REQUIRED) - endif() + find_package( Python3 REQUIRED COMPONENTS Interpreter) + set(MY_TEST_LAUNCHER ${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/test_launcher.py) + configure_file( ${CMAKE_SOURCE_DIR}/test_launcher.py.in ${CMAKE_BINARY_DIR}/test_launcher.py @ONLY FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ) # Macro for adding a new GoogleTest test function(add_gtest testName ENGINE debug testSource ) - if ("{$ENGINE}" STREQUAL "") - message(FATAL_ERROR "engine cannot be empty, ever!") - endif() - add_executable(${testName} ${testSource} ${ARGN}) - target_compile_definitions(${testName} PUBLIC LPF_CORE_IMPL_ID=${ENGINE}) - target_compile_definitions(${testName} PUBLIC LPF_CORE_MPI_USES_${ENGINE}) - if (debug) - target_include_directories( ${testName} BEFORE PRIVATE ${CMAKE_SOURCE_DIR}/include/debug ) - target_link_libraries(${testName} lpf_debug lpf_hl_debug GTest::gtest GTest::gtest_main) - else(debug) - target_link_libraries(${testName} GTest::gtest GTest::gtest_main) - endif(debug) - - - # Extract test-specific information from comments of tests - file(READ ${testSource} fileContents) - string(REGEX MATCH "Exit code: ([0-9]+)" _ ${fileContents}) - set(retCode ${CMAKE_MATCH_1}) - string(REGEX MATCH "pre P >= ([0-9]+)" _ ${fileContents}) - set(minProcs ${CMAKE_MATCH_1}) - string(REGEX MATCH "pre P <= ([0-9]+)" _ ${fileContents}) - set(maxProcs ${CMAKE_MATCH_1}) - string(REGEX MATCH "-probe ([0-9]+.[0-9]+)" _ ${fileContents}) - set(lpfProbeSecs ${CMAKE_MATCH_1}) - - target_link_exe_with_core(${testName} ${ENGINE}) - - - if ("${minProcs}" STREQUAL "") - set(minProcs "1") - endif() - if ("${maxProcs}" STREQUAL "") - set(maxProcs "5") - endif() - if ("${lpfProbeSecs}" STREQUAL "") - set(lpfProbeSecs "0.0") - endif() - if ("${retCode}" STREQUAL "") - set(retCode "0") - endif() - - # Most recent approach to Gtests, recommended! - set_property(TARGET ${testName} PROPERTY TEST_LAUNCHER ${MY_TEST_LAUNCHER};-e;${ENGINE};-L;${CMAKE_BINARY_DIR}/lpfrun_build;-p;${minProcs};-P;${maxProcs};-t;${lpfProbeSecs};-R;${retCode}) - gtest_discover_tests(${testName} - TEST_PREFIX ${ENGINE}_ - EXTRA_ARGS --gtest_output=xml:${test_output}/${ENGINE}_${testName} - DISCOVERY_MODE POST_BUILD - DISCOVERY_TIMEOUT 15 - ) + if ("{$ENGINE}" STREQUAL "") + message(FATAL_ERROR "engine cannot be empty, ever!") + endif() + add_executable(${testName} ${testSource} ${ARGN}) + target_compile_definitions(${testName} PUBLIC LPF_CORE_IMPL_ID=${ENGINE}) + target_compile_definitions(${testName} PUBLIC LPF_CORE_MPI_USES_${ENGINE}) + if (debug) + target_include_directories( ${testName} BEFORE PRIVATE ${CMAKE_SOURCE_DIR}/include/debug ) + target_link_libraries(${testName} lpf_debug lpf_hl_debug GTest::gtest GTest::gtest_main) + else(debug) + target_link_libraries(${testName} GTest::gtest GTest::gtest_main) + endif(debug) + + + # Extract test-specific information from comments of tests + file(READ ${testSource} fileContents) + string(REGEX MATCH "Exit code: ([0-9]+)" _ ${fileContents}) + set(retCode ${CMAKE_MATCH_1}) + string(REGEX MATCH "pre P >= ([0-9]+)" _ ${fileContents}) + set(minProcs ${CMAKE_MATCH_1}) + string(REGEX MATCH "pre P <= ([0-9]+)" _ ${fileContents}) + set(maxProcs ${CMAKE_MATCH_1}) + string(REGEX MATCH "-probe ([0-9]+.[0-9]+)" _ ${fileContents}) + set(lpfProbeSecs ${CMAKE_MATCH_1}) + + target_link_exe_with_core(${testName} ${ENGINE}) + + # The "\pre P <= max" comment in a test indicates the desired number of + # maximum LPF processes. If the test does not define a desired number of + # maximum LPF processes, it will be set to 5. + # + # The "\pre P >= min" comment in a test indicates the desired number of + # minimum LPF processes. If the test does not define a desired minimum + # number of LPF processes, it will be set to 1. + # + # Let 'processorCount' be the detected number of processors by the system. + # If this number is smaller than the desider minimum and/or maximum number + # of processes, it overwrites these + # + # Most tests only define a mininum number of desired processes, such as + # "\pre P >= 1". In those cases, the test will execute for the range 1,..,5 + # (including) + + if ("${minProcs}" STREQUAL "") + set(minProcs "1") + endif() + if ("${maxProcs}" STREQUAL "") + set(maxProcs "5") + endif() + # cap min with processorCount, if needed + if ("${minProcs}" GREATER "${processorCount}") + set(minProcs ${processorCount}) + endif() + # cap max with processorCount, if needed + if ("${maxProcs}" GREATER "${processorCount}") + set(maxProcs ${processorCount}) + endif() + if ("${lpfProbeSecs}" STREQUAL "") + set(lpfProbeSecs "0.0") + endif() + if ("${retCode}" STREQUAL "") + set(retCode "0") + endif() + # Most recent approach to Gtests, recommended! + set_property(TARGET ${testName} PROPERTY TEST_LAUNCHER ${MY_TEST_LAUNCHER};--engine;${ENGINE};--parallel_launcher;${CMAKE_BINARY_DIR}/lpfrun_build;--min_process_count;${minProcs};--max_process_count;${maxProcs};--lpf_probe_timer;${lpfProbeSecs};--expected_return_code;${retCode}) + gtest_discover_tests(${testName} + TEST_PREFIX ${ENGINE}_ + EXTRA_ARGS --gtest_output=xml:${test_output}/${ENGINE}_${testName} + DISCOVERY_MODE POST_BUILD + DISCOVERY_TIMEOUT 15 + ) endfunction(add_gtest) @@ -436,10 +507,11 @@ else(LPF_ENABLE_TESTS) endif(LPF_ENABLE_TESTS) +# Main LPF library includes and sources include_directories(include) include_directories(src/common) - add_subdirectory(src) + # Apps add_subdirectory(src/utils) diff --git a/README b/README index 626173c7..26b0300b 100644 --- a/README +++ b/README @@ -38,6 +38,10 @@ Optional MPI engine requires Optional for thread pinning by Pthreads and hybrid engines - hwloc > 1.11 +Optional tests requires + - GNU C++ compiler (C++17 compatible), + - Python 3. + Optional (see --enable-doc) documentation requires - doxygen > 1.5.6, - graphviz, diff --git a/bootstrap.sh b/bootstrap.sh index 60c1ec26..14628772 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -192,7 +192,7 @@ EOF --with-mpiexec=*) mpiexec="${arg#--with-mpiexec=}" - mpi_cmake_flags="${mpi_cmake_flags} -DMPIEXEC=$mpiexec" + mpi_cmake_flags="${mpi_cmake_flags} -DMPIEXEC=$mpiexec -DMPIEXEC_EXECUTABLE=$mpiexec" shift; ;; @@ -288,8 +288,8 @@ ${CMAKE_EXE} -Wno-dev \ -DLPF_HWLOC="${hwloc}" \ $hwloc_found_flag \ $mpi_cmake_flags \ - "$extra_flags" \ - "$perf_flags" \ + ${extra_flags+"$extra_flags"} \ + ${perf_flags+"$perf_flags"} \ "$@" $srcdir \ || { echo FAIL "Failed to configure LPF; Please check your chosen configuration"; exit 1; } diff --git a/cmake/mpi.cmake b/cmake/mpi.cmake index bd7ca9a5..f8d55851 100644 --- a/cmake/mpi.cmake +++ b/cmake/mpi.cmake @@ -170,8 +170,17 @@ try_run( IBVERBS_INIT_RUNS IBVERBS_INIT_COMPILES endif() set(ENABLE_IBVERBS FALSE) -if (LIB_IBVERBS AND NOT IBVERBS_INIT_RUNS STREQUAL "FAILED_TO_RUN") - set(ENABLE_IBVERBS TRUE) +if (LPF_ENABLE_TESTS) + # The Google Test integration requires that tests successfully compiled are + # also runnable + if (LIB_IBVERBS AND NOT IBVERBS_INIT_RUNS STREQUAL "FAILED_TO_RUN") + set(ENABLE_IBVERBS TRUE) + endif() +else() + # Without the aforementioned Google Test requirement, we can safely build + # it and allow the user to deploy the built binaries on IB-enabled nodes. + if (LIB_IBVERBS) + set(ENABLE_IBVERBS TRUE) + endif() endif() - diff --git a/doc/lpf_core.cfg.in b/doc/lpf_core.cfg.in index 0a8de71c..bfb940b2 100644 --- a/doc/lpf_core.cfg.in +++ b/doc/lpf_core.cfg.in @@ -742,8 +742,8 @@ INPUT = @PROJECT_SOURCE_DIR@/include/lpf/core.h \ @PROJECT_SOURCE_DIR@/include/bsp/bsp.h \ @PROJECT_SOURCE_DIR@/include/lpf/hybrid.h \ @PROJECT_SOURCE_DIR@/include/lpf/mpirpc-client.h \ - @PROJECT_SOURCE_DIR@/include/lpf/rpc-client.h - + @PROJECT_SOURCE_DIR@/include/lpf/rpc-client.h \ + @PROJECT_SOURCE_DIR@/include/lpf/abort.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/include/lpf/abort.h b/include/lpf/abort.h new file mode 100644 index 00000000..383a6ab8 --- /dev/null +++ b/include/lpf/abort.h @@ -0,0 +1,152 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LPFLIB_ABORT_H +#define LPFLIB_ABORT_H + +#include "lpf/static_dispatch.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup LPF_EXTENSIONS LPF API extensions + * @{ + * + * \defgroup LPF_ABORT Functionality for aborting LPF applications + * + * If #LPF_HAS_ABORT has a nonzero value, then a call to #lpf_abort from any + * process in a distributed application, will abort the entire application. + * + * \note As with all LPF extensions, it is \em not mandatory for all LPF + * implementations to support this one. + * + * If #LPF_HAS_ABORT has a zero value, then a call to #lpf_abort shall have no + * other effect than it returning #LPF_SUCCESS. + * + * Therefore, + * - LPF implementations that cannot support an abort functionality may still + * provide a valid, albeit trivial, implementation of this extension. + * - LPF applications that aim to rely on #lpf_abort should first ensure that + * #LPF_HAS_ABORT is nonzero. + * + * \warning Portable LPF implementations best not rely on #lpf_abort at all. + * Although sometimes unavoidable, the recommendation is to avoid the + * use of this extension as best as possible. + * + * \note One case where #lpf_abort is absolutely required is for \em testing an + * LPF debug layer. Such a layer should detect erroneous usage, report it, + * but then typically cannot continue execution. In this case, relying on + * the standard abort or exit functionalities to terminate the process the + * error was detected at, typically results in implementation-specific + * (i.e., undefined) behaviour with regards to how the application at + * large terminates. This means that a test-suite for such a debug layer + * cannot reliably detect whether a distributed application has terminated + * for the expected reasons. In this case, #lpf_abort provides a reliable + * mechanism that such a test requires. + * + * @{ + */ + +/** + * Whether the active LPF engine supports aborting distributed applications. + * + * If the value of this field is zero (0), then a call to #lpf_abort will be a + * no-op and always return #LPF_SUCCESS. + */ +extern _LPFLIB_VAR const int LPF_HAS_ABORT ; + +/** + * A call to this function aborts the distributed application as soon as + * possible. + * + * \warning This function corresponds to a no-op if #LPF_HAS_ABORT equals zero. + * + * The below specification only applies when #LPF_HAS_ABORT contains a non-zero + * value; otherwise, a call to this function will have no other effect besides + * returning #LPF_SUCCESS. + * + * \note Rationale: the capability to abort relies on the software stack that + * underlies LPF, and in aiming to be a minimal API, LPF does not wish to + * force such a capabilities unto the underlying software or system. + * + * \note Applications that rely on #lpf_abort therefore should first check if + * the capability is supported. + * + * \note The recommended way to abort LPF applications that is fully supported + * by the core specification alone (i.e., excluding this #lpf_abort + * extension), is to simply exit the process that should be aborted. + * Compliant LPF implementations will then quit sibling processes at + * latest at a call to #lpf_sync that should handle communications + * with the exited process. Sibling processes may also exit early without + * involvement of LPF. In all cases, the parent call to #lpf_exec, + * #lpf_hook, or #lpf_rehook should return with #LPF_ERR_FATAL. + * + * \warning Therefore, whenever possible, code implemented on top of LPF ideally + * does not rely on #lpf_abort. Instead, error handling more reliably + * could be implemented on top of the above-described default LPF + * behaviour. + * + * The call to #lpf_abort differs from the stdlib abort; for example, + * implementations are not required to raise SIGABRT as part of a call to + * #lpf_abort. Instead, the requirements are that: + * 1. processes that call this function terminate during the call to + * #lpf_abort; + * 2. all other processes associated with the distributed application terminate + * at latest during a next call to #lpf_sync that should have handled + * communications with any aborted process; + * 3. regardless of whether LPF aborted sibling processes, whether they exited + * gracefully, or whether they also called #lpf_abort, the process(es) which + * made the parent call to #lpf_exec, #lpf_hook, or #lpf_rehook should + * either: a) terminate also, at latest when all (other) associated + * processes have terminated, (exclusive-)or b) return #LPF_ERR_FATAL. + * Which behaviour (a or b) will be followed is up to the implementation, + * and portable applications should account for both possibilities. + * + * \note In the above, \em other is between parenthesis since the processes + * executing the application may be fully disjoint from the process that + * spawned the application. In this case it is natural to elect that the + * spawning process returns #LPF_ERR_FATAL, though under this + * specification also that process may be aborted before the spawning + * call returns. + * + * \note If one of the associated processes deadlock (e.g. due to executing + * while(1){}), it shall remain undefined when the entire + * application aborts. Implementations shall make a best effort to do this + * as early as possible. + * + * \note Though implied by the above, we note explicitly that #lpf_abort is + * \em not a collective function; a single process calling #lpf_abort can + * terminate all associated processes. + * + * @returns #LPF_SUCCESS If and only if #LPF_HAS_ABORT equals zero. + * + * If #LPF_HAS_ABORT is nonzero, then this function shall not return. + */ +extern _LPFLIB_API +lpf_err_t lpf_abort(lpf_t ctx); + +/** + * @} + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/lpf/core.h b/include/lpf/core.h index c9a7f921..0d4434e2 100644 --- a/include/lpf/core.h +++ b/include/lpf/core.h @@ -126,6 +126,8 @@ * - \ref LPF_EXTENSIONS * - \ref LPF_PTHREAD * - \ref LPF_MPI + * - \ref LPF_HYBRID + * - \ref LPF_ABORT * - \ref LPF_HL * - \ref LPF_BSPLIB * - \ref LPF_COLLECTIVES @@ -988,7 +990,7 @@ typedef struct lpf_machine { * both bounds are inclusive. * \param[in] min_msg_size A byte size value that is larger or equal to 0. * \param[in] attr A #lpf_sync_attr_t value. When in doubt, always - * use #LPF_SYNC_DEFAULT + * use #LPF_SYNC_DEFAULT. * * \returns The guaranteed value for the message gap given an LPF SPMD * section using \a p processes, for a superstep in which a user @@ -2425,6 +2427,15 @@ lpf_err_t lpf_get_rcvd_msg_count( lpf_t ctx, size_t *rcvd_msgs); extern _LPFLIB_API lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t *sent_msgs, lpf_memslot_t slot); +/** + * This function returns in @rcvd_msgs the total received message count. + * It is only implemented for the zero backend (on Infiniband) + * \param[in] ctx The LPF context + * \param[out] sent_msgs Sent message count + */ +extern _LPFLIB_API +lpf_err_t lpf_get_sent_msg_count( lpf_t ctx, size_t *sent_msgs); + /** * This function blocks until all the scheduled messages via * ibv_post_send are completed (via ibv_poll_cq). This includes diff --git a/include/lpf/hybrid.h b/include/lpf/hybrid.h index 00845f08..4c324adf 100644 --- a/include/lpf/hybrid.h +++ b/include/lpf/hybrid.h @@ -28,7 +28,7 @@ extern "C" { * * @{ * - * \defgroup LPF_HYBRID Specific to Hybrid implementation + * \defgroup LPF_HYBRID Specific to the hybrid engine * * @{ */ diff --git a/include/lpf/noc.h b/include/lpf/noc.h new file mode 100644 index 00000000..8949f6e7 --- /dev/null +++ b/include/lpf/noc.h @@ -0,0 +1,472 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LPFLIB_NOC_H +#define LPFLIB_NOC_H + +// import size_t data type for the implementation +#ifndef DOXYGEN + +#ifdef __cplusplus +#include +#else +#include +#endif + +#include + +#endif // DOXYGEN + + +#ifdef __cplusplus +extern "C" { +#endif + +/** \addtogroup LPF_EXTENSIONS LPF API extensions + * + * @{ + * + * \defgroup LPF_NOC Extensions to LPF where it need not maintain consistency. + * + * This extension specifies facilities for (de-)registering memory slots, + * registering RDMA requests, and fencing RDMA requests. These extensions are, + * as far as possible, fully compatible with the core LPF definitions. These + * include LPF contexts (#lpf_t), processor count types (#lpf_pid_t), memory + * slot types (#lpf_memslot_t), and message attributes (#lpf_msg_attr_t). + * + * In this extension, LPF does not maintain consistency amongst processes that + * (de-)register memory slots while RDMA communication may occur. Maintaining + * the required consistency instead becomes the purview of the user. This + * extension specificies exactly what consistency properties the user must + * guarantee. + * + * \warning If LPF is considered a tool for the so-called hero + * programmer, then please note that this variant is even harder + * to program with. + * + * \note At present, no debug layer exists for this extension. It is unclear if + * such a debug layer is even possible (precisely because LPF in this + * extension does not maintain consistency, there is no way a debug layer + * could enforce it). + * + * @{ + */ + + +/** + * The version of this no-conflict LPF specification. All implementations shall + * define this macro. The format is YYYYNN, where YYYY is the year the + * specification was released, and NN the number of the specifications released + * before this one in the same year. + */ +#define _LPF_NOC_VERSION 202400L + +/** + * Resizes the memory register for non-coherent RDMA. + * + * After a successful call to this function, the local process has enough + * resources to register \a max_regs memory regions in a non-coherent way. + * + * Each registration via lpf_noc_register() counts as one. Such registrations + * remain taking up capacity in the register until they are released via a call + * to lpf_noc_deregister(), which lowers the count of used memory registerations + * by one. + * + * There are no runtime out-of-bounds checks prescribed for lpf_noc_register()-- + * this would also be too costly as error checking would require communication. + * + * If memory allocation were successful, the return value is #LPF_SUCCESS and + * the local process may assume the new buffer size \a max_regs. + * + * In the case of insufficient local memory the return value will be + * #LPF_ERR_OUT_OF_MEMORY. In that case, it is as if the call never happened and + * the user may retry the call locally after freeing up unused resources. Should + * retrying not lead to a successful call, the programmer may opt to broadcast + * the error (using existing slots) or to give up by returning from the spmd + * section. + * + * \note The current maximum cannot be retrieved from the runtime. Instead, the + * programmer must track this information herself. To provide + * encapsulation, see lpf_rehook(). + * + * \note When the given memory register capacity is smaller than the current + * capacity, the runtime is allowed but not required to release the + * allocated memory. Such a call shall always be successful and return + * #LPF_SUCCESS. + * + * \note This means that an implementation that allows shrinking the given + * capacity must also ensure the old buffer remains intact in case there + * is not enough memory to allocate a smaller one. + * + * \note The last invocation of lpf_noc_resize_memory_register() determines the + * maximum number of memory registrations using lpf_noc_register() that + * can be maintained concurrently. + * + * \par Thread safety + * This function is safe to be called from different LPF processes only. Any + * further thread safety may be guaranteed by the implementation, but is not + * specified. Similar conditions hold for all LPF primitives that take an + * argument of type #lpf_t; see #lpf_t for more information. + * + * \param[in,out] ctx The runtime state as provided by lpf_exec(). + * \param[in] max_regs The requested maximum number of memory regions that can + * be registered. This value must be the same on all + * processes. + * + * \returns #LPF_SUCCESS + * When this process successfully acquires the resources. + * + * \returns #LPF_ERR_OUT_OF_MEMORY + * When there was not enough memory left on the heap. In this case + * the effect is the same as when this call did not occur at all. + * + * \par BSP costs + * None + * + * See also \ref BSPCOSTS. + * + * \par Runtime costs + * \f$ \Theta( \mathit{max\_regs} ) \f$. + */ +extern _LPFLIB_API +lpf_err_t lpf_noc_resize_memory_register( lpf_t ctx, size_t max_regs ); + +/** + * Registers a local memory area, preparing its use for intra-process + * communication. + * + * The registration process is necessary to enable Remote Direct Memory Access + * (RDMA) primitives, such as lpf_get() and lpf_put(). + * + * This is \em not a collective function. For #lpf_get and #lpf_put, the memory + * slot returned by this function is equivalent to a memory slot returned by + * #lpf_register_local; the \a memslot returned by a successful call to this + * function (hence) is immediately valid. A successful call (hence) immediately + * consumes one memory slot capacity; see also #lpf_resize_memory_register on + * how to ensure sufficient capacity. + * + * Different from a memory slot returned by #lpf_register_local, a memory slot + * returned by a successful call to this function may serve as either a local + * or remote memory slot for #lpf_noc_put and #lpf_noc_get. + * + * Use of the returned memory slot to indicate a remote memory area may only + * occur by copying the returned memory slot to another LPF process. This may + * be done using the standard #lpf_put and #lpf_get methods or by using + * auxiliary communication mechanisms. The memory slot thus communicated only + * refers to a valid memory area on the process it originated from; any other + * use leads to undefined behaviour. + * + * \note Note that the ability to copy memory slots to act as identifiers of + * remote areas exploits the LPF core specification that instances of + * the #lpf_memslot_t type are, indeed, byte-copyable. + * + * A memory slot returned by a successful call to this function may be + * destroyed via a call to the standard #lpf_deregister. The deregistration + * takes effect immediately. No communication using the deregistered slot + * should occur during that superstep, or otherwise undefined behaviour occurs. + * + * Only the process that created the returned memory slot can destroy it; other + * LPF processes than the one which created it that attempt to destroy the + * returned memory slot, invoke undefined behaviour. + * + * Other than the above specified differences, the arguments to this function + * are the same as for #lpf_register_local: + * + * \param[in,out] ctx The runtime state as provided by lpf_exec(). + * \param[in] pointer The pointer to the memory area to register. + * \param[in] size The size of the memory area to register in bytes. + * \param[out] memslot Where to store the memory slot identifier. + * + * \note Registering a slot with zero \a size is valid. The resulting memory + * slot cannot be written to nor read from by remote LPF processes. + * + * \note In particular, passing \c NULL as \a pointer and \c 0 for \a size is + * valid. + * + * \returns #LPF_SUCCESS + * Successfully registered the memory region and successfully + * assigned a memory slot identifier. + * + * \note One registration consumes one memory slot from the pool of locally + * available memory slots, which must have been preallocated by + * lpf_resize_memory_register() or recycled by lpf_deregister(). Always + * use lpf_resize_memory_register() at the start of the SPMD function + * that is executed by lpf_exec(), since lpf_exec() itself does not + * preallocate slots. + * + * \note It is illegal to request more memory slots than have previously been + * registered with lpf_resize_memory_register(). There is no runtime + * check for this error, because a safe way out cannot be guaranteed + * without significant parallel error checking overhead. + * + * \par Thread safety + * This function is safe to be called from different LPF processes only. Any + * further thread safety may be guaranteed by the implementation, but is not + * specified. Similar conditions hold for all LPF primitives that take an + * argument of type #lpf_t; see #lpf_t for more information. + * + * \par BSP costs + * + * None. + * + * \par Runtime costs + * + * \f$ \mathcal{O}( \texttt{size} ) \f$. + * + * \note This asymptotic bound may be attained for implementations that require + * linear-time processing on the registered memory area, such as to effect + * memory pinning. If this is not required, a good implementation will + * require only \f$ \Theta(1) \f$ time. + */ +extern _LPFLIB_API +lpf_err_t lpf_noc_register( + lpf_t ctx, + void * pointer, + size_t size, + lpf_memslot_t * memslot +); + +/** + * Deregisters a memory area previously registered using lpf_noc_register(). + * + * After a successful deregistration, the slot is returned to the pool of free + * memory slots. The total number of memory slots may be set via a call to + * lpf_noc_resize_memory_register(). + * + * Deregistration takes effect immediately. A call to this function is not + * collective, and the other of deregistration does not need to match the order + * of registration. Any local or remote communication using the given \a memslot + * in the current superstep invokes undefined behaviour. + * + * \par Thread safety + * This function is safe to be called from different LPF processes only. Any + * further thread safety may be guaranteed by the implementation, but is not + * specified. Similar conditions hold for all LPF primitives that take an + * argument of type #lpf_t; see #lpf_t for more information. + * + * \param[in,out] ctx The runtime state as provided by lpf_exec(). + * \param[in] memslot The memory slot identifier to de-register. + * + * \returns #LPF_SUCCESS + * Successfully deregistered the memory region. + * + * \par BSP costs + * None. + * + * \par Runtime costs + * \f$ \mathcal{O}(n) \f$, where \f$ n \f$ is the size of the memory region + * corresponding to \a memslot. + */ +extern _LPFLIB_API +lpf_err_t lpf_noc_deregister( + lpf_t ctx, + lpf_memslot_t memslot +); + +/** + * Copies contents of local memory into the memory of remote processes. + * + * This operation is guaranteed to be completed after a call to the next + * lpf_sync() exits. + * + * Until that time it occupies one entry in the operations queue. + * + * Concurrent reads or writes from or to the same memory area are + * allowed in the same way they are for the core primitive #lpf_put. + * + * This primitive differs from #lpf_put in that the \a dst_slot may be the + * result of a successful call to #lpf_noc_register, while \a src_slot \em must + * be the results of such a successful call. In both cases, the slot need + * \em not have been registered before the last call to #lpf_sync. + * + * \par Thread safety + * This function is safe to be called from different LPF processes only. Any + * further thread safety may be guaranteed by the implementation, but is not + * specified. Similar conditions hold for all LPF primitives that take an + * argument of type #lpf_t; see #lpf_t for more information. + * + * \param[in,out] ctx The runtime state as provided by lpf_exec() + * \param[in] src_slot The memory slot of the local source memory area + * registered using lpf_register_local(), + * lpf_register_global(), or lpf_noc_register() + * \param[in] src_offset The offset of reading out the source memory area, + * w.r.t. the base location of the registered area + * expressed in bytes. + * \param[in] dst_pid The process ID of the destination process. + * \param[in] dst_slot The memory slot of the destination memory area at + * \a pid, registered using lpf_register_global() or + * lpf_noc_register(). + * \param[in] dst_offset The offset of writing to the destination memory area + * w.r.t. the base location of the registered area + * expressed in bytes. + * \param[in] size The number of bytes to copy from the source memory area + * to the destination memory area. + * \param[in] attr + * \parblock + * In case an \a attr not equal to #LPF_MSG_DEFAULT is provided, the + * the message created by this function may have modified semantics + * that may be used to extend this API. Examples include: + * + * -# delaying the superstep deadline of delivery, and/or + * -# DRMA with message combining semantics. + * + * These attributes are stored after a call to this function has + * completed and may be modified immediately after without affecting + * any messages already scheduled. + * \endparblock + * + * \note See #lpf_put for notes regarding #lpf_msg_attr_t. + * + * \returns #LPF_SUCCESS + * When the communication request was recorded successfully. + * + * \par BSP costs + * This function will increase + * \f$ t_{c}^{(s)} \f$ + * and + * \f$ r_{c}^{(\mathit{pid})} \f$ + * by \a size, where c is the current superstep number and s is this process ID + * (as provided by #lpf_exec)). See \ref BSPCOSTS on how this affects real-time + * communication costs. + * + * \par Runtime costs + * See \ref BSPCOSTS. + */ +extern _LPFLIB_API +lpf_err_t lpf_noc_put( + lpf_t ctx, + lpf_memslot_t src_slot, + size_t src_offset, + lpf_pid_t dst_pid, + lpf_memslot_t dst_slot, + size_t dst_offset, + size_t size, + lpf_msg_attr_t attr +); + +/** + * Copies contents from remote memory to local memory. + * + * This operation completes after one call to lpf_sync(). + * + * Until that time it occupies one entry in the operations queue. + * + * Concurrent reads or writes from or to the same memory area are allowed in the + * same way it is for #lpf_get. + * + * This primitive differs from #lpf_get in that the \a src_slot may be the + * result of a successful call to #lpf_noc_register, while \a dst_slot \em must + * be the results of such a successful call. In both cases, the slot need + * \em not have been registered before the last call to #lpf_sync. + * + * \par Thread safety + * This function is safe to be called from different LPF processes only. Any + * further thread safety may be guaranteed by the implementation, but is not + * specified. Similar conditions hold for all LPF primitives that take an + * argument of type #lpf_t; see #lpf_t for more information. + * + * \param[in,out] ctx The runtime state as provided by lpf_exec(). + * \param[in] src_pid The process ID of the source process. + * \param[in] src_slot The memory slot of the source memory area at \a pid, as + * globally registered with lpf_register_global() or + * lpf_noc_register(). + * \param[in] src_offset The offset of reading out the source memory area, + * w.r.t. the base location of the registered area + * expressed in bytes. + * \param[in] dst_slot The memory slot of the local destination memory area + * registered using lpf_register_local(), + * lpf_register_global(), or lpf_noc_register(). + * \param[in] dst_offset The offset of writing to the destination memory area + * w.r.t. the base location of the registered area + * expressed in bytes. + * \param[in] size The number of bytes to copy from the source + * remote memory location. + * \param[in] attr + * \parblock + * In case an \a attr not equal to #LPF_MSG_DEFAULT is provided, the + * the message created by this function may have modified semantics + * that may be used to extend this API. Examples include: + * + * -# delaying the superstep deadline of delivery, and/or + * -# DRMA with message combining semantics. + * + * These attributes are stored after a call to this function has + * completed and may be modified immediately after without affecting + * any messages already scheduled. + * \endparblock + * + * \note See #lpf_get for notes on the use of #lpf_msg_attr_t. + * + * \returns #LPF_SUCCESS + * When the communication request was recorded successfully. + * + * \par BSP costs + * This function will increase + * \f$ r_{c}^{(s)} \f$ + * and + * \f$ t_{c}^{(\mathit{pid})} \f$ + * by \a size, where c is the current superstep number and s is this process ID + * (as provided via lpf_exec(). See \ref BSPCOSTS on how this affects real-time + * communication costs. + * + * \par Runtime costs + * See \ref BSPCOSTS. + */ +extern _LPFLIB_API +lpf_err_t lpf_noc_get( + lpf_t ctx, + lpf_pid_t src_pid, + lpf_memslot_t src_slot, + size_t src_offset, + lpf_memslot_t dst_slot, + size_t dst_offset, + size_t size, + lpf_msg_attr_t attr +); + +extern _LPFLIB_API +lpf_err_t lpf_noc_serialize_slot( + lpf_t ctx, + lpf_memslot_t slot, + char ** buff, + size_t * buff_size +); +/* + * lpf_deserialize_slot may only be called on a slot + * already registered via lpf_noc_register. + * This call sets the memory registration attributes from + * the byte array buff with byte size buff_size. + * This array must have been created via a call to + * @lpf_serialize_slot + */ +extern _LPFLIB_API + lpf_err_t lpf_noc_deserialize_slot( + lpf_t ctx, + char * buff, + lpf_memslot_t slot +); +/** + * @} + * + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/lpf/pthread.h b/include/lpf/pthread.h index ba68f3f8..454eaf0e 100644 --- a/include/lpf/pthread.h +++ b/include/lpf/pthread.h @@ -28,7 +28,7 @@ extern "C" { * * @{ * - * \defgroup LPF_PTHREAD Specific to Pthreads + * \defgroup LPF_PTHREAD Specific to the Pthreads engine * * @{ */ diff --git a/include/lpf/static_dispatch.h b/include/lpf/static_dispatch.h index 8816f9e9..71d65526 100644 --- a/include/lpf/static_dispatch.h +++ b/include/lpf/static_dispatch.h @@ -45,6 +45,7 @@ #undef lpf_register_local #undef lpf_get_rcvd_msg_count #undef lpf_get_rcvd_msg_count_per_slot +#undef lpf_get_sent_msg_count #undef lpf_get_sent_msg_count_per_slot #undef lpf_register_global #undef lpf_flush_sent @@ -85,6 +86,7 @@ #undef LPF_NONE #undef LPF_INIT_NONE #undef LPF_NO_ARGS +#undef LPF_HAS_ABORT #ifdef LPF_FUNC @@ -96,6 +98,7 @@ #define lpf_register_local LPF_FUNC(register_local) #define lpf_get_rcvd_msg_count LPF_FUNC(get_rcvd_msg_count) #define lpf_get_rcvd_msg_count_per_slot LPF_FUNC(get_rcvd_msg_count_per_slot) +#define lpf_get_sent_msg_count LPF_FUNC(get_sent_msg_count) #define lpf_get_sent_msg_count_per_slot LPF_FUNC(get_sent_msg_count_per_slot) #define lpf_flush_sent LPF_FUNC(flush_sent) #define lpf_flush_received LPF_FUNC(flush_received) @@ -136,6 +139,7 @@ #define LPF_NONE LPF_CONST(NONE) #define LPF_INIT_NONE LPF_CONST(INIT_NONE) #define LPF_NO_ARGS LPF_CONST(NO_ARGS) +#define LPF_HAS_ABORT LPF_CONST(HAS_ABORT) #endif diff --git a/lpfcc.in b/lpfcc.in index b1a89659..b58da83a 100644 --- a/lpfcc.in +++ b/lpfcc.in @@ -187,6 +187,32 @@ do shift ;; + # The below two special cases are to ensure good integration with CMake. + # Note that the arguments that follow -MT and -MQ are object files, which + # otherwise would be appended to the objects list. In case of manual + # usage of lpfcc, therefore, the use of these flags should come after --, + # however, it is unclear how to pass that to CMake and we choose this + # solution instead (nor would that be desired-- ideally, lpfcc can act as + # a "regular" CC from the CMake perspective) + + -MT) + other_args[$arg_number]="-MT" + arg_number=$((arg_number + 1)) + shift + other_args[$arg_number]="$arg" + arg_number=$((arg_number + 1)) + shift + ;; + + -MQ) + other_args[$arg_number]="-MQ" + arg_number=$((arg_number + 1)) + shift + other_args[$arg_number]="$arg" + arg_number=$((arg_number + 1)) + shift + ;; + *) case $state in engine) diff --git a/post-install/cmake-module-test/src/CMakeLists.txt b/post-install/cmake-module-test/src/CMakeLists.txt index fe1ae2a8..eeef8252 100644 --- a/post-install/cmake-module-test/src/CMakeLists.txt +++ b/post-install/cmake-module-test/src/CMakeLists.txt @@ -15,7 +15,7 @@ # limitations under the License. # -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.10) project(findlpf_test) find_package(lpf REQUIRED CONFIG) diff --git a/post-install/func_lpf_hook_subset.mpimsg.cpp b/post-install/func_lpf_hook_subset.mpimsg.cpp new file mode 100644 index 00000000..6b7d3a5c --- /dev/null +++ b/post-install/func_lpf_hook_subset.mpimsg.cpp @@ -0,0 +1,67 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + + +const int LPF_MPI_AUTO_INITIALIZE=0; + +void test_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args) +{ + (void) ctx; + (void) pid; + (void) nprocs; + (void) args; + return; +} + +void subset_func(MPI_Comm comm) +{ + MPI_Barrier(comm); + + lpf_init_t init; + lpf_err_t rc = lpf_mpi_initialize_with_mpicomm(comm, &init); + + rc = lpf_hook(init, test_spmd, LPF_NO_ARGS); +} + +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + + int s; + MPI_Comm_rank(MPI_COMM_WORLD, &s); + + int subset = s < 2; // Processes are divided into 2 subsets {0,1} and {2,...,p-1} + + MPI_Comm subset_comm; + MPI_Comm_split(MPI_COMM_WORLD, subset, s, &subset_comm); + +// only the first subset enters that function + if (subset) + { + subset_func(subset_comm); + } + + MPI_Barrier(MPI_COMM_WORLD); // Paranoid barrier + + MPI_Finalize(); + +} diff --git a/post-install/post-install-test.cmake.in b/post-install/post-install-test.cmake.in index 65c9ef9f..75c5de13 100644 --- a/post-install/post-install-test.cmake.in +++ b/post-install/post-install-test.cmake.in @@ -268,12 +268,12 @@ if (MPI_FOUND) endif() - message("Compiling a simple LPF program with mpimsg engine") + message("Compiling a simple MPI LPF program with mpimsg engine") # Compile this to check whether mpi.h can be found execute_process( - COMMAND @bindir@/lpfcc -engine mpimsg -I@common@ - @testdir@/func_lpf_hook_subset.mpimsg.c - -o lpfhook_subset_mpimsg_cc + COMMAND @bindir@/lpfcxx -engine mpimsg -I@common@ + @srcdir@/func_lpf_hook_subset.mpimsg.cpp -c + -o lpfhook_subset_mpimsg_cc.o WORKING_DIRECTORY @builddir@ RESULT_VARIABLE status ) @@ -353,6 +353,9 @@ endif() ###### CMake integration using generated CMake module file ############ foreach(engine @ENGINES@) + if ("${engine}" STREQUAL "zero") + continue() + endif() message("Testing generated CMake module files for engine ${engine}") set(test_dir @builddir@/cmake-module-test-${engine}) diff --git a/post-install/test-lpf-nprocs.c b/post-install/test-lpf-nprocs.c index cf274b3f..554b5775 100644 --- a/post-install/test-lpf-nprocs.c +++ b/post-install/test-lpf-nprocs.c @@ -53,6 +53,8 @@ void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) lpf_memslot_t mem_slot = LPF_INVALID_MEMSLOT; lpf_register_global( lpf, mem, nprocs, &mem_slot ); + lpf_sync(lpf, LPF_SYNC_DEFAULT); + if (pid != 0) lpf_get( lpf, 0, params_slot, 0, params_slot, 0, sizeof(params), LPF_MSG_DEFAULT ); diff --git a/src/MPI/CMakeLists.txt b/src/MPI/CMakeLists.txt index 9633d1d5..636b243c 100644 --- a/src/MPI/CMakeLists.txt +++ b/src/MPI/CMakeLists.txt @@ -35,27 +35,25 @@ if (MPI_FOUND) set_target_properties( lpf_proxy_dummy PROPERTIES LINK_FLAGS "${MPI_C_LINK_FLAGS}" ) target_include_directories( lpf_proxy_dummy PRIVATE ${MPI_C_INCLUDE_PATH}) - target_compile_flags(lpf_proxy_dummy PRIVATE ${MPI_C_COMPILE_FLAGS}) install( TARGETS lpf_proxy_dummy RUNTIME DESTINATION ${INSTALL_HELPERS} ) - set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME}) -# univ_ stands for universal interface => lpf_exec, lpf_put, etc... -# spec_ stands for specific interface => lpf_mpimsg_release_exec, lpf_mpimsg_release_put, etc... + # univ_ stands for universal interface => lpf_exec, lpf_put, etc... + # spec_ stands for specific interface => lpf_mpimsg_release_exec, lpf_mpimsg_release_put, etc... foreach (iface "univ_" "spec_" ) - foreach (LPF_IMPL_ID ${MPI_ENGINES}) - set(libname "lpf_core_${iface}${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}") - set(comlib "lpf_common_${LPFLIB_CONFIG_NAME}") - - set(ibverbs_sources) - if (LPF_IMPL_ID STREQUAL ibverbs) + foreach (LPF_IMPL_ID ${MPI_ENGINES}) + set(libname "lpf_core_${iface}${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}") + set(comlib "lpf_common_${LPFLIB_CONFIG_NAME}") + + set(ibverbs_sources) + if (LPF_IMPL_ID STREQUAL ibverbs) set(ibverbs_sources ibverbs.cpp) endif() if (LPF_IMPL_ID STREQUAL zero) - set(ibverbs_sources ibverbsZero.cpp) + set(ibverbs_sources ibverbsZero.cpp ibverbsNoc.cpp) endif() add_library(raw_${libname} OBJECT @@ -71,15 +69,13 @@ if (MPI_FOUND) spall2all.c messagesort.cpp spall2all.cpp - init.cpp + init.cpp ${ibverbs_sources} ) target_compile_flags(raw_${libname} - PUBLIC ${MPI_C_COMPILE_FLAGS} - INTERFACE "-fPIC" - ) + INTERFACE "-fPIC") target_compile_definitions(raw_${libname} PRIVATE "LPF_CORE_MPI_USES_${LPF_IMPL_ID}=1" @@ -107,9 +103,7 @@ if (MPI_FOUND) MACOSX_RPATH TRUE) target_compile_flags(${libname} - PUBLIC ${MPI_C_COMPILE_FLAGS} - INTERFACE "-fPIC" - ) + INTERFACE "-fPIC") if (iface STREQUAL "spec_") target_compile_definitions(${libname} @@ -175,31 +169,43 @@ if (MPI_FOUND) ${CMAKE_CURRENT_SOURCE_DIR}/dynamichook.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp) - configure_file( dynamichook.t.sh.in dynamichook.t.sh @ONLY) - set( dynamic_hook_t_sh "${CMAKE_CURRENT_BINARY_DIR}/dynamichook.t.sh") - add_test(NAME dynamichook_1proc - COMMAND bash ${dynamic_hook_t_sh} 1) - set_tests_properties( dynamichook_1proc PROPERTIES TIMEOUT 30 ) - add_test(NAME dynamichook_2proc - COMMAND bash ${dynamic_hook_t_sh} 2) - set_tests_properties( dynamichook_2proc PROPERTIES TIMEOUT 30 ) - add_test(NAME dynamichook_3proc - COMMAND bash ${dynamic_hook_t_sh} 3) - set_tests_properties( dynamichook_3proc PROPERTIES TIMEOUT 30 ) - add_test(NAME dynamichook_10proc - COMMAND bash ${dynamic_hook_t_sh} 10) - set_tests_properties( dynamichook_10proc PROPERTIES TIMEOUT 30 ) + configure_file( dynamichook.t.sh.in dynamichook.t.sh @ONLY) + set( dynamic_hook_t_sh "${CMAKE_CURRENT_BINARY_DIR}/dynamichook.t.sh") + add_test(NAME dynamichook_1proc + COMMAND bash ${dynamic_hook_t_sh} 1) + # We set all dynamichook tests to run in serial mode, without any other tests, + # since these tests occupy the same port and would block each other + set_tests_properties( dynamichook_1proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE) + add_test(NAME dynamichook_2proc + COMMAND bash ${dynamic_hook_t_sh} 2) + set_tests_properties( dynamichook_2proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE) + add_test(NAME dynamichook_3proc + COMMAND bash ${dynamic_hook_t_sh} 3) + set_tests_properties( dynamichook_3proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE) + add_test(NAME dynamichook_10proc + COMMAND bash ${dynamic_hook_t_sh} 10) + set_tests_properties( dynamichook_10proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE) endif() # Other unit tests if (ENABLE_IBVERBS AND LPF_ENABLE_TESTS) add_gtest( ibverbs_test "ibverbs" ON ${CMAKE_CURRENT_SOURCE_DIR}/ibverbs.t.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ibverbs.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp) + ibverbs.cpp mpilib.cpp) add_gtest( zero_test "zero" ON ${CMAKE_CURRENT_SOURCE_DIR}/ibverbs.t.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ibverbsZero.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp) + + # NOC test for HiCR + set(mode "") + set(LPF_IMPL_ID "zero") + set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME}) + set(exeName "func_verbs_test_noc_register_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}") + add_gtest(${exeName} ${LPF_IMPL_ID} ON ${CMAKE_CURRENT_SOURCE_DIR}/func_verbs_test_noc_register.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ibverbsZero.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ibverbsNoc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp) + endif() foreach (engine ${MPI_ENGINES}) @@ -229,4 +235,3 @@ if (MPI_FOUND) endif(MPI_FOUND) - diff --git a/src/MPI/core.cpp b/src/MPI/core.cpp index 4340bd27..dc3f0a0f 100644 --- a/src/MPI/core.cpp +++ b/src/MPI/core.cpp @@ -15,8 +15,10 @@ * limitations under the License. */ +#include #include #include +#include #include #include @@ -36,6 +38,11 @@ #include + +// the value 2 in this implementation indicates support for lpf_abort in a way +// that may deviate from the stdlib abort() +const int LPF_HAS_ABORT = 2; + // Error codes. // Note: Some code (e.g. in process::broadcastSymbol) depends on the // fact that numbers are assigned in order of severity, where 0 means @@ -331,6 +338,15 @@ lpf_err_t lpf_get_rcvd_msg_count( lpf_t ctx, size_t * rcvd_msgs) return LPF_SUCCESS; } +lpf_err_t lpf_get_sent_msg_count( lpf_t ctx, size_t * sent_msgs) +{ + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) { + i->getSentMsgCount(sent_msgs); + } + return LPF_SUCCESS; +} + lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t * sent_msgs, lpf_memslot_t slot) { lpf::Interface * i = realContext(ctx); @@ -392,4 +408,106 @@ lpf_err_t lpf_abort( lpf_t ctx ) { return LPF_SUCCESS; } +lpf_err_t lpf_noc_resize_memory_register( lpf_t ctx, size_t max_regs ) +{ + lpf::Interface * i = realContext(ctx); + if (i->isAborted()) + return LPF_SUCCESS; + + return i->nocResizeMemreg(max_regs); +} + +lpf_err_t lpf_noc_register( + lpf_t ctx, + void * pointer, + size_t size, + lpf_memslot_t * memslot +) +{ + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) + *memslot = i->nocRegister(pointer, size); + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_deregister( + lpf_t ctx, + lpf_memslot_t memslot +) +{ + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) + i->nocDeregister(memslot); + + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_put( + lpf_t ctx, + lpf_memslot_t src_slot, + size_t src_offset, + lpf_pid_t dst_pid, + lpf_memslot_t dst_slot, + size_t dst_offset, + size_t size, + lpf_msg_attr_t attr +) +{ + (void) attr; // ignore parameter 'msg' since this implementation only + // implements core functionality + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) + i->nocPut( src_slot, src_offset, dst_pid, dst_slot, dst_offset, size ); + + return LPF_SUCCESS; + +} + +lpf_err_t lpf_noc_get( + lpf_t ctx, + lpf_pid_t pid, + lpf_memslot_t src, + size_t src_offset, + lpf_memslot_t dst, + size_t dst_offset, + size_t size, + lpf_msg_attr_t attr +) +{ + (void) attr; // ignore parameter 'msg' since this implementation only + // implements core functionality + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) + i->nocGet( pid, src, src_offset, dst, dst_offset, size ); + + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_serialize_slot( + lpf_t ctx, + lpf_memslot_t slot, + char ** buff, + size_t * buff_size +) +{ + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) + return i->serializeSlot(slot, buff, buff_size); + + return LPF_ERR_FATAL; +} + +lpf_err_t lpf_noc_deserialize_slot( + lpf_t ctx, + char * buff, + lpf_memslot_t slot +) +{ + lpf::Interface * i = realContext(ctx); + if (!i->isAborted()) + return i->deserializeSlot( buff, slot); + + return LPF_ERR_FATAL; + +} diff --git a/src/MPI/func_verbs_test_noc_register.cpp b/src/MPI/func_verbs_test_noc_register.cpp new file mode 100644 index 00000000..6e9ad17f --- /dev/null +++ b/src/MPI/func_verbs_test_noc_register.cpp @@ -0,0 +1,86 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ibverbsNoc.hpp" +#include "mpilib.hpp" +#include +#include "gtest/gtest.h" + + +using namespace lpf::mpi; + +extern "C" const int LPF_MPI_AUTO_INITIALIZE=0; + + +/** + * \test Testing NOC functionality + * \pre P >= 2 + * \pre P <= 2 + * \return Exit code: 0 + */ +TEST( API, func_verbsAPI_zero_test_noc_ring ) +{ + + char buf1[30] = {'\0'}; + char buf2[30] = {'\0'}; + + strcpy(buf1, "HELLO"); + + MPI_Init(NULL, NULL); + Lib::instance(); + Comm * comm = new Comm(); + *comm = Lib::instance().world(); + int rank = comm->pid(); + assert(comm->nprocs() > 0); + comm->barrier(); + IBVerbsNoc * verbs = new IBVerbsNoc( *comm ); + + verbs->resizeMemreg(3); + comm->barrier(); + + verbs->resizeMesgq( 2 ); + comm->barrier(); + + IBVerbs::SlotID b1 = verbs->regLocal( buf1, sizeof(buf1) ); + IBVerbs::SlotID b2 = verbs->regNoc( buf2, sizeof(buf2) ); + + auto mr = verbs->getMR(b1, rank); + mr = verbs->getMR(b2, rank); + assert(mr._addr != nullptr); + char * buffer; + size_t bufSize = mr.serialize(&buffer); + std::string bufAsString(buffer); + + int left = (comm->nprocs() + rank - 1) % comm->nprocs(); + int right = (rank + 1) % comm->nprocs(); + char rmtBuff[bufSize]; + std::stringstream ss(buffer); + + MPI_Sendrecv(buffer, bufSize, MPI_BYTE, left, 0, rmtBuff, bufSize, MPI_BYTE, right, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + MemoryRegistration * newMr = MemoryRegistration::deserialize(rmtBuff); + verbs->setMR(b2, right, *newMr); + comm->barrier(); + verbs->put( b1, 0, right, b2, 0, sizeof(buf1)); + verbs->sync(true); + EXPECT_EQ(std::string(buf2), std::string(buf1)); + verbs->dereg(b1); + verbs->dereg(b2); + delete verbs; + delete comm; + MPI_Finalize(); +} diff --git a/src/MPI/ibverbs.cpp b/src/MPI/ibverbs.cpp index 5dcdbfc8..77832aae 100644 --- a/src/MPI/ibverbs.cpp +++ b/src/MPI/ibverbs.cpp @@ -45,9 +45,20 @@ namespace { } } +size_t MemoryRegistration :: serialize(char ** buf) { + (void) buf; + throw IBVerbs::Exception( "MemoryRegistration::serialize(char ** buf) not implemented for base IBVerbs class"); +} + +MemoryRegistration * MemoryRegistration :: deserialize(char * buf) +{ + (void) buf; + throw IBVerbs::Exception( "MemoryRegistration::deserialize(char * buf) not implemented for base IBVerbs class"); +} IBVerbs :: IBVerbs( Communication & comm ) - : m_pid( comm.pid() ) + : m_comm( comm ) + , m_pid( comm.pid() ) , m_nprocs( comm.nprocs() ) , m_devName() , m_ibPort( Config::instance().getIBPort() ) @@ -72,7 +83,6 @@ IBVerbs :: IBVerbs( Communication & comm ) , m_memreg() , m_dummyMemReg() , m_dummyBuffer() - , m_comm( comm ) { m_peerList.reserve( m_nprocs ); @@ -97,7 +107,6 @@ IBVerbs :: IBVerbs( Communication & comm ) throw Exception( "No Infiniband devices available" ); } - std::string wantDevName = Config::instance().getIBDeviceName(); LOG( 3, "Searching for device '"<< wantDevName << "'" ); struct ibv_device * dev = NULL; @@ -463,8 +472,8 @@ void IBVerbs :: resizeMemreg( size_t size ) throw std::bad_alloc() ; } - MemoryRegistration null = { 0, 0, 0, 0 }; - MemorySlot dflt; dflt.glob.resize( m_nprocs, null ); + MemoryRegistration newMR = { nullptr, 0, 0, 0, m_pid}; + MemorySlot dflt; dflt.glob.resize( m_nprocs, newMR ); m_memreg.reserve( size, dflt ); } @@ -507,11 +516,7 @@ IBVerbs :: SlotID IBVerbs :: regLocal( void * addr, size_t size ) throw Exception("Could not register memory area"); } } - MemoryRegistration local; - local.addr = addr; - local.size = size; - local.lkey = size?slot.mr->lkey:0; - local.rkey = size?slot.mr->rkey:0; + MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid); SlotID id = m_memreg.addLocalReg( slot ); @@ -551,11 +556,7 @@ IBVerbs :: SlotID IBVerbs :: regGlobal( void * addr, size_t size ) // exchange memory registration info globally ref.glob.resize(m_nprocs); - MemoryRegistration local; - local.addr = addr; - local.size = size; - local.lkey = size?slot.mr->lkey:0; - local.rkey = size?slot.mr->rkey:0; + MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid); LOG(4, "All-gathering memory register data" ); @@ -583,13 +584,13 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset, struct ibv_send_wr sr; std::memset(&sr, 0, sizeof(sr)); const char * localAddr - = static_cast(src.glob[m_pid].addr) + srcOffset; + = static_cast(src.glob[m_pid]._addr) + srcOffset; const char * remoteAddr - = static_cast(dst.glob[dstPid].addr) + dstOffset; + = static_cast(dst.glob[dstPid]._addr) + dstOffset; sge.addr = reinterpret_cast( localAddr ); sge.length = std::min(size, m_maxMsgSize ); - sge.lkey = src.mr->lkey; + sge.lkey = src.mr->lkey; m_sges.push_back( sge ); bool lastMsg = ! m_activePeers.contains( dstPid ); @@ -603,7 +604,7 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset, sr.num_sge = 1; sr.opcode = IBV_WR_RDMA_WRITE; sr.wr.rdma.remote_addr = reinterpret_cast( remoteAddr ); - sr.wr.rdma.rkey = dst.glob[dstPid].rkey; + sr.wr.rdma.rkey = dst.glob[dstPid]._rkey; m_srsHeads[ dstPid ] = m_srs.size(); m_srs.push_back( sr ); @@ -632,9 +633,9 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset, struct ibv_send_wr sr; std::memset(&sr, 0, sizeof(sr)); const char * localAddr - = static_cast(dst.glob[m_pid].addr) + dstOffset; + = static_cast(dst.glob[m_pid]._addr) + dstOffset; const char * remoteAddr - = static_cast(src.glob[srcPid].addr) + srcOffset; + = static_cast(src.glob[srcPid]._addr) + srcOffset; sge.addr = reinterpret_cast( localAddr ); sge.length = std::min(size, m_maxMsgSize ); @@ -652,7 +653,7 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset, sr.num_sge = 1; sr.opcode = IBV_WR_RDMA_READ; sr.wr.rdma.remote_addr = reinterpret_cast( remoteAddr ); - sr.wr.rdma.rkey = src.glob[srcPid].rkey; + sr.wr.rdma.rkey = src.glob[srcPid]._rkey; m_srsHeads[ srcPid ] = m_srs.size(); m_srs.push_back( sr ); diff --git a/src/MPI/ibverbs.hpp b/src/MPI/ibverbs.hpp index f53c9354..b165f777 100644 --- a/src/MPI/ibverbs.hpp +++ b/src/MPI/ibverbs.hpp @@ -58,6 +58,23 @@ using std::shared_ptr; using std::tr1::shared_ptr; #endif +class MemoryRegistration { + public: + char * _addr; + size_t _size; + uint32_t _lkey; + uint32_t _rkey; + int _pid; + MemoryRegistration(char * addr, size_t size, uint32_t lkey, uint32_t rkey, int pid) : _addr(addr), + _size(size), _lkey(lkey), _rkey(rkey), _pid(pid) + { } + MemoryRegistration() : _addr(nullptr), _size(0), _lkey(0), _rkey(0), _pid(-1) {} + size_t serialize(char ** buf); + static MemoryRegistration * deserialize(char * buf); + +}; + + class _LPFLIB_LOCAL IBVerbs { public: @@ -72,6 +89,7 @@ class _LPFLIB_LOCAL IBVerbs void resizeMesgq( size_t size ); SlotID regLocal( void * addr, size_t size ); + SlotID regNoc( void * addr, size_t size ); SlotID regGlobal( void * addr, size_t size ); void dereg( SlotID id ); @@ -93,7 +111,7 @@ class _LPFLIB_LOCAL IBVerbs void doRemoteProgress(); - void countingSyncPerSlot(bool resized, SlotID tag, size_t sent, size_t recvd); + void countingSyncPerSlot(SlotID tag, size_t sent, size_t recvd); /** * @syncPerSlot only guarantees that all already scheduled sends (via put), * or receives (via get) associated with a slot are completed. It does @@ -101,16 +119,18 @@ class _LPFLIB_LOCAL IBVerbs * no guarantee that a remote process will wait til data is put into its * memory, as it does schedule the operation (one-sided). */ - void syncPerSlot(bool resized, SlotID slot); + void syncPerSlot(SlotID slot); // Do the communication and synchronize // 'Reconnect' must be a globally replicated value void sync( bool reconnect); void get_rcvd_msg_count(size_t * rcvd_msgs); + void get_sent_msg_count(size_t * sent_msgs); void get_rcvd_msg_count_per_slot(size_t * rcvd_msgs, SlotID slot); void get_sent_msg_count_per_slot(size_t * sent_msgs, SlotID slot); -private: + +protected: IBVerbs & operator=(const IBVerbs & ); // assignment prohibited IBVerbs( const IBVerbs & ); // copying prohibited @@ -123,22 +143,16 @@ class _LPFLIB_LOCAL IBVerbs void doProgress(); void tryIncrement(Op op, Phase phase, SlotID slot); - struct MemoryRegistration { - void * addr; - size_t size; - uint32_t lkey; - uint32_t rkey; - }; - struct MemorySlot { shared_ptr< struct ibv_mr > mr; // verbs structure std::vector< MemoryRegistration > glob; // array for global registrations }; + + Communication & m_comm; int m_pid; // local process ID int m_nprocs; // number of processes std::atomic_size_t m_numMsgs; - //std::atomic_size_t m_sendTotalInitMsgCount; std::atomic_size_t m_recvTotalInitMsgCount; std::atomic_size_t m_sentMsgs; std::atomic_size_t m_recvdMsgs; @@ -157,8 +171,6 @@ class _LPFLIB_LOCAL IBVerbs size_t m_cqSize; size_t m_minNrMsgs; size_t m_maxSrs; // maximum number of sends requests per QP - size_t m_postCount; - size_t m_recvCount; shared_ptr< struct ibv_context > m_device; // device handle shared_ptr< struct ibv_pd > m_pd; // protection domain @@ -173,10 +185,6 @@ class _LPFLIB_LOCAL IBVerbs // Connected queue pairs std::vector< shared_ptr > m_connectedQps; - std::vector rcvdMsgCount; - std::vector sentMsgCount; - std::vector getMsgCount; - std::vector slotActive; std::vector< struct ibv_send_wr > m_srs; // array of send requests @@ -193,8 +201,13 @@ class _LPFLIB_LOCAL IBVerbs shared_ptr< struct ibv_mr > m_dummyMemReg; // registration of dummy buffer std::vector< char > m_dummyBuffer; // dummy receive buffer - - Communication & m_comm; + // + std::vector rcvdMsgCount; + std::vector sentMsgCount; + std::vector getMsgCount; + std::vector slotActive; + size_t m_postCount; + size_t m_recvCount; }; diff --git a/src/MPI/ibverbsNoc.cpp b/src/MPI/ibverbsNoc.cpp new file mode 100644 index 00000000..7f185fc1 --- /dev/null +++ b/src/MPI/ibverbsNoc.cpp @@ -0,0 +1,98 @@ +#include "ibverbsNoc.hpp" + +namespace lpf +{ +namespace mpi +{ + + size_t MemoryRegistration :: serialize(char ** buf) { + std::stringstream ss; + size_t bufSize = sizeof(uintptr_t) + sizeof(size_t) + 2*sizeof(uint32_t) + sizeof(int); + *buf = new char[bufSize]; + char *ptr = *buf; + uintptr_t addrAsUintPtr = reinterpret_cast(_addr); + memcpy(ptr, &addrAsUintPtr, sizeof(uintptr_t)); + ptr += sizeof(uintptr_t); + memcpy(ptr, &_size, sizeof(size_t)); + ptr += sizeof(size_t); + memcpy(ptr, &_lkey, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + memcpy(ptr, &_rkey, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + memcpy(ptr, &_pid, sizeof(int)); + return bufSize; + } + + MemoryRegistration * MemoryRegistration :: deserialize(char * buf) { + + char * addr; + size_t size; + uint32_t lkey; + uint32_t rkey; + uintptr_t addrAsUintPtr; + int pid; + char * ptr = buf; + memcpy(&addrAsUintPtr, ptr, sizeof(uintptr_t)); + addr = reinterpret_cast(addrAsUintPtr); + ptr += sizeof(uintptr_t); + memcpy(&size, ptr, sizeof(size_t)); + ptr += sizeof(size_t); + memcpy(&lkey, ptr, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + memcpy(&rkey, ptr, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + memcpy(&pid, ptr, sizeof(int)); + return new MemoryRegistration(addr, size, lkey, rkey, pid); + } + + struct IBVerbsNoc::Exception : std::runtime_error { + Exception(const char * what) : std::runtime_error( what ) {} + }; + + MemoryRegistration IBVerbsNoc :: getMR(SlotID slotId, int pid) + { + const MemorySlot & slot = m_memreg.lookup( slotId ); + return slot.glob[pid]; + } + + void IBVerbsNoc::setMR(SlotID slotId, int pid, MemoryRegistration & mr) + { + m_memreg.update(slotId).glob[pid] = mr; + } + + IBVerbsNoc::IBVerbsNoc(Communication & comm) : IBVerbs(comm) + { + } + + IBVerbs::SlotID IBVerbsNoc :: regNoc( void * addr, size_t size ) + { + ASSERT( size <= m_maxRegSize ); + + MemorySlot slot; + if ( size > 0) { + LOG(4, "IBVerbsNoc::regLocal: Registering locally memory area at " << addr << " of size " << size ); + struct ibv_mr * const ibv_mr_new_p = ibv_reg_mr( + m_pd.get(), addr, size, + IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE + ); + if( ibv_mr_new_p == NULL ) + slot.mr.reset(); + else + slot.mr.reset( ibv_mr_new_p, ibv_dereg_mr ); + if (!slot.mr) { + LOG(1, "Could not register memory area at " + << addr << " of size " << size << " with IB device"); + throw Exception("Could not register memory area"); + } + } + MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid); + + SlotID id = m_memreg.addNocReg( slot ); + m_memreg.update( id ).glob.resize( m_nprocs ); + m_memreg.update( id ).glob[m_pid] = local; + LOG(4, "Memory area " << addr << " of size " << size << " has been locally registered as NOC slot. Slot = " << id ); + return id; + } + +} // namespace mpi +} // namespace lpf diff --git a/src/MPI/ibverbsNoc.hpp b/src/MPI/ibverbsNoc.hpp new file mode 100644 index 00000000..d9ece946 --- /dev/null +++ b/src/MPI/ibverbsNoc.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "ibverbs.hpp" + +namespace lpf +{ + +namespace mpi +{ + class _LPFLIB_LOCAL IBVerbsNoc : public IBVerbs { + public: + IBVerbsNoc(Communication & comm); + IBVerbs::SlotID regNoc( void * addr, size_t size ); + MemoryRegistration getMR(SlotID slotId, int pid); + void setMR(SlotID slotId, int pid, MemoryRegistration & mr); + + }; +} // namespace mpi +} // namespace lpf diff --git a/src/MPI/ibverbsZero.cpp b/src/MPI/ibverbsZero.cpp index 6f52fa5b..7cec923a 100644 --- a/src/MPI/ibverbsZero.cpp +++ b/src/MPI/ibverbsZero.cpp @@ -53,14 +53,20 @@ namespace { IBVerbs :: IBVerbs( Communication & comm ) - : m_pid( comm.pid() ) + : m_comm( comm ) + , m_pid( comm.pid() ) , m_nprocs( comm.nprocs() ) + , m_numMsgs(0) + , m_recvTotalInitMsgCount(0) + , m_sentMsgs(0) + , m_recvdMsgs(0) , m_devName() , m_ibPort( Config::instance().getIBPort() ) , m_gidIdx( Config::instance().getIBGidIndex() ) , m_mtu( getMTU( Config::instance().getIBMTU() )) , m_maxRegSize(0) , m_maxMsgSize(0) + , m_cqSize(1) , m_minNrMsgs(0) , m_maxSrs(0) , m_device() @@ -78,14 +84,8 @@ IBVerbs :: IBVerbs( Communication & comm ) , m_memreg() , m_dummyMemReg() , m_dummyBuffer() - , m_comm( comm ) - , m_cqSize(1) , m_postCount(0) , m_recvCount(0) - , m_numMsgs(0) - , m_recvTotalInitMsgCount(0) - , m_sentMsgs(0) - , m_recvdMsgs(0) { // arrays instead of hashmap for counters @@ -260,7 +260,7 @@ IBVerbs :: ~IBVerbs() inline void IBVerbs :: tryIncrement(Op op, Phase phase, SlotID slot) { - + switch (phase) { case Phase::INIT: rcvdMsgCount[slot] = 0; @@ -306,7 +306,7 @@ inline void IBVerbs :: tryIncrement(Op op, Phase phase, SlotID slot) { void IBVerbs :: stageQPs( size_t maxMsgs ) { // create the queue pairs - for ( int i = 0; i < m_nprocs; ++i) { + for ( size_t i = 0; i < static_cast(m_nprocs); ++i) { struct ibv_qp_init_attr attr; std::memset(&attr, 0, sizeof(attr)); @@ -321,6 +321,7 @@ void IBVerbs :: stageQPs( size_t maxMsgs ) attr.cap.max_recv_sge = 1; struct ibv_qp * const ibv_new_qp_p = ibv_create_qp( m_pd.get(), &attr ); + ASSERT(m_stagedQps.size() > i); if( ibv_new_qp_p == NULL ) { m_stagedQps[i].reset(); } else { @@ -352,7 +353,7 @@ void IBVerbs :: doRemoteProgress() { pollResult = ibv_poll_cq(m_cqRemote.get(), POLL_BATCH, wcs); if (pollResult > 0) { LOG(3, "Process " << m_pid << " signals: I received " << pollResult << " remote messages in doRemoteProgress"); - } + } else if (pollResult < 0) { LOG( 1, "Failed to poll IB completion queue" ); @@ -367,10 +368,10 @@ void IBVerbs :: doRemoteProgress() { << wcs[i].vendor_err ); } else { - LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].src_qp = "<< wcs[i].src_qp); - LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].slid = "<< wcs[i].slid); - LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].wr_id = "<< wcs[i].wr_id); - LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].imm_data = "<< wcs[i].imm_data); + LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].src_qp = "<< wcs[i].src_qp); + LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].slid = "<< wcs[i].slid); + LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].wr_id = "<< wcs[i].wr_id); + LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].imm_data = "<< wcs[i].imm_data); /** * Here is a trick: @@ -463,7 +464,6 @@ void IBVerbs :: reconnectQPs() struct ibv_recv_wr rr; std::memset(&rr, 0, sizeof(rr)); struct ibv_sge sge; std::memset(&sge, 0, sizeof(sge)); - struct ibv_recv_wr *bad_wr = NULL; sge.addr = reinterpret_cast(m_dummyBuffer.data()); sge.length = m_dummyBuffer.size(); sge.lkey = m_dummyMemReg->lkey; @@ -553,8 +553,8 @@ void IBVerbs :: resizeMemreg( size_t size ) throw std::bad_alloc() ; } - MemoryRegistration null = { 0, 0, 0, 0 }; - MemorySlot dflt; dflt.glob.resize( m_nprocs, null ); + MemoryRegistration newMR = { nullptr, 0, 0, 0, m_pid}; + MemorySlot dflt; dflt.glob.resize( m_nprocs, newMR); m_memreg.reserve( size, dflt ); } @@ -616,14 +616,10 @@ IBVerbs :: SlotID IBVerbs :: regLocal( void * addr, size_t size ) throw Exception("Could not register memory area"); } } - MemoryRegistration local; - local.addr = addr; - local.size = size; - local.lkey = size?slot.mr->lkey:0; - local.rkey = size?slot.mr->rkey:0; + MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid); SlotID id = m_memreg.addLocalReg( slot ); - tryIncrement(Op::SEND/* <- dummy for init */, Phase::INIT, id); + tryIncrement(Op::SEND, Phase::INIT, id); m_memreg.update( id ).glob.resize( m_nprocs ); m_memreg.update( id ).glob[m_pid] = local; @@ -662,12 +658,7 @@ IBVerbs :: SlotID IBVerbs :: regGlobal( void * addr, size_t size ) // exchange memory registration info globally ref.glob.resize(m_nprocs); - MemoryRegistration local; - local.addr = addr; - local.size = size; - local.lkey = size?slot.mr->lkey:0; - local.rkey = size?slot.mr->rkey:0; - + MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid); LOG(4, "All-gathering memory register data" ); m_comm.allgather( local, ref.glob.data() ); @@ -694,9 +685,9 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst const MemorySlot & dst = m_memreg.lookup( dstSlot); char * localAddr - = static_cast(src.glob[m_pid].addr) + srcOffset; + = static_cast(src.glob[m_pid]._addr) + srcOffset; const char * remoteAddr - = static_cast(dst.glob[dstPid].addr) + dstOffset; + = static_cast(dst.glob[dstPid]._addr) + dstOffset; struct ibv_sge sge; memset(&sge, 0, sizeof(sge)); @@ -704,7 +695,6 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst sge.length = std::min(size, m_maxMsgSize ); sge.lkey = src.mr->lkey; - struct ibv_wc wcs[POLL_BATCH]; struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); wr.wr_id = srcSlot; @@ -716,7 +706,7 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst wr.wr.atomic.remote_addr = reinterpret_cast(remoteAddr); wr.wr.atomic.compare_add = compare_add; wr.wr.atomic.swap = swap; - wr.wr.atomic.rkey = dst.glob[dstPid].rkey; + wr.wr.atomic.rkey = dst.glob[dstPid]._rkey; struct ibv_send_wr *bad_wr; int error; std::vector opcodes; @@ -729,7 +719,7 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst } /** - * Keep waiting on a completion of events until you + * Keep waiting on a completion of events until you * register a completed atomic compare-and-swap */ do { @@ -741,7 +731,7 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst } while (std::find(opcodes.begin(), opcodes.end(), IBV_WC_COMP_SWAP) == opcodes.end()); uint64_t * remoteValueFound = reinterpret_cast(localAddr); - /* + /* * if we fetched the value we expected, then * we are holding the lock now (that is, we swapped successfully!) * else, re-post your request for the lock @@ -775,9 +765,9 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset, sge = &sges[i]; std::memset(sge, 0, sizeof(ibv_sge)); sr = &srs[i]; std::memset(sr, 0, sizeof(ibv_send_wr)); const char * localAddr - = static_cast(src.glob[m_pid].addr) + srcOffset; + = static_cast(src.glob[m_pid]._addr) + srcOffset; const char * remoteAddr - = static_cast(dst.glob[dstPid].addr) + dstOffset; + = static_cast(dst.glob[dstPid]._addr) + dstOffset; sge->addr = reinterpret_cast( localAddr ); sge->length = std::min(size, m_maxMsgSize ); @@ -791,9 +781,9 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset, sr->send_flags = lastMsg ? IBV_SEND_SIGNALED : 0; sr->opcode = lastMsg? IBV_WR_RDMA_WRITE_WITH_IMM : IBV_WR_RDMA_WRITE; /* use wr_id to later demultiplex srcSlot */ - sr->wr_id = srcSlot; + sr->wr_id = srcSlot; /* - * In HiCR, we need to know at receiver end which slot + * In HiCR, we need to know at receiver end which slot * has received the message. But here is a trick: */ sr->imm_data = dstSlot; @@ -801,7 +791,7 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset, sr->sg_list = &sges[i]; sr->num_sge = 1; sr->wr.rdma.remote_addr = reinterpret_cast( remoteAddr ); - sr->wr.rdma.rkey = dst.glob[dstPid].rkey; + sr->wr.rdma.rkey = dst.glob[dstPid]._rkey; srs[i] = *sr; size -= sge->length; @@ -843,9 +833,9 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset, sr = &srs[i]; std::memset(sr, 0, sizeof(ibv_send_wr)); const char * localAddr - = static_cast(dst.glob[m_pid].addr) + dstOffset; + = static_cast(dst.glob[m_pid]._addr) + dstOffset; const char * remoteAddr - = static_cast(src.glob[srcPid].addr) + srcOffset; + = static_cast(src.glob[srcPid]._addr) + srcOffset; sge->addr = reinterpret_cast( localAddr ); sge->length = std::min(size, m_maxMsgSize ); @@ -861,7 +851,7 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset, sr->num_sge = 1; sr->opcode = IBV_WR_RDMA_READ; sr->wr.rdma.remote_addr = reinterpret_cast( remoteAddr ); - sr->wr.rdma.rkey = src.glob[srcPid].rkey; + sr->wr.rdma.rkey = src.glob[srcPid]._rkey; // This logic is reversed compared to ::put // (not srcSlot, as this slot is remote) sr->wr_id = dstSlot; // <= DO NOT CHANGE THIS !!! @@ -890,25 +880,29 @@ void IBVerbs :: get_rcvd_msg_count(size_t * rcvd_msgs) { *rcvd_msgs = m_recvdMsgs; } +void IBVerbs :: get_sent_msg_count(size_t * sent_msgs) { + *sent_msgs = m_sentMsgs; +} + void IBVerbs :: get_rcvd_msg_count_per_slot(size_t * rcvd_msgs, SlotID slot) { - *rcvd_msgs = rcvdMsgCount[slot]; + *rcvd_msgs = rcvdMsgCount[slot] + getMsgCount[slot]; } void IBVerbs :: get_sent_msg_count_per_slot(size_t * sent_msgs, SlotID slot) { - *sent_msgs = sentMsgCount.at(slot); + *sent_msgs = sentMsgCount[slot]; } std::vector IBVerbs :: wait_completion(int& error) { error = 0; - LOG(5, "Polling for messages" ); + LOG(1, "Polling for messages" ); struct ibv_wc wcs[POLL_BATCH]; int pollResult = ibv_poll_cq(m_cqLocal.get(), POLL_BATCH, wcs); std::vector opcodes; if ( pollResult > 0) { - LOG(3, "Process " << m_pid << ": Received " << pollResult << " acknowledgements"); + LOG(4, "Process " << m_pid << ": Received " << pollResult << " acknowledgements"); for (int i = 0; i < pollResult ; ++i) { if (wcs[i].status != IBV_WC_SUCCESS) @@ -923,10 +917,10 @@ std::vector IBVerbs :: wait_completion(int& error) { error = 1; } else { - LOG(3, "Process " << m_pid << " Send wcs[" << i << "].src_qp = "<< wcs[i].src_qp); - LOG(3, "Process " << m_pid << " Send wcs[" << i << "].slid = "<< wcs[i].slid); - LOG(3, "Process " << m_pid << " Send wcs[" << i << "].wr_id = "<< wcs[i].wr_id); - LOG(3, "Process " << m_pid << " Send wcs[" << i << "].imm_data = "<< wcs[i].imm_data); + LOG(4, "Process " << m_pid << " Send wcs[" << i << "].src_qp = "<< wcs[i].src_qp); + LOG(4, "Process " << m_pid << " Send wcs[" << i << "].slid = "<< wcs[i].slid); + LOG(4, "Process " << m_pid << " Send wcs[" << i << "].wr_id = "<< wcs[i].wr_id); + LOG(4, "Process " << m_pid << " Send wcs[" << i << "].imm_data = "<< wcs[i].imm_data); } SlotID slot = wcs[i].wr_id; @@ -936,18 +930,20 @@ std::vector IBVerbs :: wait_completion(int& error) { // This is a get call completing if (wcs[i].opcode == IBV_WC_RDMA_READ) { tryIncrement(Op::GET, Phase::POST, slot); + LOG(4, "Rank " << m_pid << " with GET, increments getMsgCount to " << getMsgCount[slot] << " for LPF slot " << slot); } // This is a put call completing - if (wcs[i].opcode == IBV_WC_RDMA_WRITE) + if (wcs[i].opcode == IBV_WC_RDMA_WRITE) { tryIncrement(Op::SEND, Phase::POST, slot); + LOG(4, "Rank " << m_pid << " with SEND, increments getMsgCount to " << sentMsgCount[slot] << " for LPF slot " << slot); + } - LOG(3, "Rank " << m_pid << " increments sent message count to " << sentMsgCount[slot] << " for LPF slot " << slot); } } } else if (pollResult < 0) { - LOG( 5, "Failed to poll IB completion queue" ); + LOG( 1, "Failed to poll IB completion queue" ); throw Exception("Poll CQ failure"); } return opcodes; @@ -980,10 +976,12 @@ void IBVerbs :: flushSent() } -void IBVerbs :: countingSyncPerSlot(bool resized, SlotID slot, size_t expectedSent, size_t expectedRecvd) { +void IBVerbs :: countingSyncPerSlot(SlotID slot, size_t expectedSent, size_t expectedRecvd) { - size_t actualRecvd; - size_t actualSent; + bool sentOK = false; + bool recvdOK = false; + if (expectedSent == 0) sentOK = true; + if (expectedRecvd == 0) recvdOK = true; int error; if (slotActive[slot]) { do { @@ -995,14 +993,25 @@ void IBVerbs :: countingSyncPerSlot(bool resized, SlotID slot, size_t expectedSe // this call triggers doRemoteProgress doRemoteProgress(); - } while ( - (rcvdMsgCount[slot] < m_recvInitMsgCount[slot]) || - (sentMsgCount[slot] < m_sendInitMsgCount[slot]) - ); + /* + * 1) Are we expecting nothing here (sentOK/recvdOK = true) + * 2) do the sent and received messages match our expectations? + */ + sentOK = (sentOK || sentMsgCount[slot] >= expectedSent); + // We can receive messages passively (from remote puts) and actively (from our gets) + recvdOK = (recvdOK || (rcvdMsgCount[slot] + getMsgCount[slot]) >= expectedRecvd); + LOG(4, "PID: " << m_pid << " rcvdMsgCount[" << slot << "] = " << rcvdMsgCount[slot] + << " expectedRecvd = " << expectedRecvd + << " sentMsgCount[" << slot << "] = " << sentMsgCount[slot] + << " expectedSent = " << expectedSent + << " m_recvInitMsgCount[" << slot << "] = " << m_recvInitMsgCount[slot] + << " m_sendInitMsgCount[" << slot << "] = " << m_sendInitMsgCount[slot]); + + } while (!(sentOK && recvdOK)); } } -void IBVerbs :: syncPerSlot(bool resized, SlotID slot) { +void IBVerbs :: syncPerSlot(SlotID slot) { int error; do { @@ -1034,15 +1043,14 @@ void IBVerbs :: syncPerSlot(bool resized, SlotID slot) { void IBVerbs :: sync(bool resized) { - - int error = 0; + (void) resized; // flush send queues flushSent(); // flush receive queues flushReceived(); - LOG(1, "Process " << m_pid << " will call barrier\n"); + LOG(4, "Process " << m_pid << " will call barrier at end of sync\n"); m_comm.barrier(); diff --git a/src/MPI/interface.cpp b/src/MPI/interface.cpp index 80123e58..53203042 100644 --- a/src/MPI/interface.cpp +++ b/src/MPI/interface.cpp @@ -75,7 +75,10 @@ void Interface :: initRoot(int *argc, char ***argv) Interface :: Interface( mpi::Comm machine, Process & subprocess ) try : m_comm( machine ) , m_subprocess( subprocess ) - , m_mesgQueue( m_comm ) + +//#if defined (LPF_CORE_MPI_USES_zero) || defined (LPF_CORE_MPI_USES_ibverbs) + ,m_mesgQueue( m_comm) +//#endif , m_aborted( false ) { if ( machine.allreduceOr( false ) ) @@ -129,6 +132,15 @@ void Interface :: getSentMsgCountPerSlot(size_t * msgs, SlotID slot) { m_mesgQueue.getSentMsgCountPerSlot(msgs, slot); } + +void Interface :: getRcvdMsgCount(size_t * msgs) { + m_mesgQueue.getRcvdMsgCount(msgs); +} + +void Interface :: getSentMsgCount(size_t * msgs) { + m_mesgQueue.getSentMsgCount(msgs); +} + void Interface :: flushSent() { m_mesgQueue.flushSent(); } @@ -137,10 +149,6 @@ void Interface :: flushReceived() { m_mesgQueue.flushReceived(); } -void Interface :: getRcvdMsgCount(size_t * msgs) { - m_mesgQueue.getRcvdMsgCount(msgs); -} - err_t Interface :: countingSyncPerSlot(memslot_t slot, size_t expected_sent, size_t expected_rcvd) { if ( 0 == m_aborted ) @@ -219,6 +227,51 @@ void Interface :: abort() #endif } +/* start NOC extensions */ +memslot_t Interface :: nocRegister( void * mem, size_t size ) +{ + return m_mesgQueue.addNocReg( mem, size ); +} + +void Interface :: nocDeregister( memslot_t slot) +{ + m_mesgQueue.removeReg(slot); +} + +err_t Interface :: nocResizeMemreg( size_t nRegs ) +{ + return m_mesgQueue.resizeMemreg(nRegs); +} + +void Interface :: nocPut( memslot_t srcSlot, size_t srcOffset, + pid_t dstPid, memslot_t dstSlot, size_t dstOffset, + size_t size ) +{ + m_mesgQueue.put( srcSlot, srcOffset, + dstPid, dstSlot, dstOffset, + size ); +} + +void Interface :: nocGet( pid_t srcPid, memslot_t srcSlot, size_t srcOffset, + memslot_t dstSlot, size_t dstOffset, + size_t size ) +{ + m_mesgQueue.get( srcPid, srcSlot, srcOffset, + dstSlot, dstOffset, + size ); +} + +err_t Interface :: serializeSlot(SlotID slot, char ** buff, size_t *buff_size) +{ + return m_mesgQueue.serializeSlot(slot, buff, buff_size); +} + +err_t Interface :: deserializeSlot(char * buff, SlotID slot) +{ + return m_mesgQueue.deserializeSlot(buff, slot); +} +/* end NOC extensions */ + pid_t Interface :: isAborted() const { return m_aborted; diff --git a/src/MPI/interface.hpp b/src/MPI/interface.hpp index 02e48b3c..004e9edc 100644 --- a/src/MPI/interface.hpp +++ b/src/MPI/interface.hpp @@ -61,6 +61,23 @@ class _LPFLIB_LOCAL Interface err_t resizeMesgQueue( size_t nMsgs ) ; // nothrow void abort() ; // nothrow + // + /* start NOC extensions */ + memslot_t nocRegister( void * mem, size_t size ) ; // nothrow + void nocDeregister( memslot_t slot) ; // nothrow + err_t nocResizeMemreg( size_t nRegs ) ; // nothrow + void nocPut( memslot_t srcSlot, size_t srcOffset, + pid_t dstPid, memslot_t dstSlot, size_t dstOffset, + size_t size ) ; // nothrow + + void nocGet( pid_t srcPid, memslot_t srcSlot, size_t srcOffset, + memslot_t dstSlot, size_t dstOffset, + size_t size ) ;// nothrow + + err_t serializeSlot(memslot_t slot, char ** buff, size_t *buff_size); + + err_t deserializeSlot(char * buff, memslot_t slot); + /* end NOC extensions */ pid_t isAborted() const ; @@ -82,6 +99,8 @@ class _LPFLIB_LOCAL Interface void getSentMsgCountPerSlot(size_t * msgs, SlotID slot); + void getSentMsgCount(size_t * msgs); + void getRcvdMsgCount(size_t * msgs); void flushSent(); @@ -108,6 +127,7 @@ class _LPFLIB_LOCAL Interface mpi::Comm m_comm; Process & m_subprocess; MessageQueue m_mesgQueue; + pid_t m_aborted; static Interface * s_root; diff --git a/src/MPI/memorytable.cpp b/src/MPI/memorytable.cpp index 51947985..4ebb546d 100644 --- a/src/MPI/memorytable.cpp +++ b/src/MPI/memorytable.cpp @@ -23,8 +23,10 @@ namespace lpf { MemoryTable :: MemoryTable( Communication & comm -#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero +#if defined LPF_CORE_MPI_USES_ibverbs , mpi::IBVerbs & ibverbs +#elif defined LPF_CORE_MPI_USES_zero + , mpi::IBVerbsNoc & ibverbs #endif ) : m_memreg() @@ -42,6 +44,17 @@ MemoryTable :: MemoryTable( Communication & comm { (void) comm; } +MemoryTable :: Slot +MemoryTable :: addNoc( void * mem, std::size_t size ) // nothrow +{ +#if defined LPF_CORE_MPI_USES_zero + Memory rec( mem, size, m_ibverbs.regNoc(mem, size)); + return m_memreg.addNocReg( rec); +#else + return m_memreg.invalidSlot(); +#endif +} + MemoryTable :: Slot MemoryTable :: addLocal( void * mem, std::size_t size ) // nothrow { @@ -53,6 +66,15 @@ MemoryTable :: addLocal( void * mem, std::size_t size ) // nothrow return m_memreg.addLocalReg( rec); } +#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero +mpi::IBVerbs::SlotID MemoryTable :: getVerbID(MemoryTable::Slot slot) const +{ + Memory sl = m_memreg.lookup(slot); + ASSERT(sl.slot != m_memreg.invalidSlot()); + return m_memreg.lookup( slot ).slot; +} +#endif + MemoryTable :: Slot MemoryTable :: addGlobal( void * mem, std::size_t size ) // nothrow { diff --git a/src/MPI/memorytable.hpp b/src/MPI/memorytable.hpp index 05c01eee..1308aa33 100644 --- a/src/MPI/memorytable.hpp +++ b/src/MPI/memorytable.hpp @@ -23,8 +23,10 @@ #include "assert.hpp" #include "linkage.hpp" -#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero +#if defined LPF_CORE_MPI_USES_ibverbs #include "ibverbs.hpp" +#elif defined LPF_CORE_MPI_USES_zero +#include "ibverbsNoc.hpp" #endif @@ -64,14 +66,18 @@ class _LPFLIB_LOCAL MemoryTable static Slot invalidSlot() { return Register::invalidSlot(); } -#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero +#if defined LPF_CORE_MPI_USES_ibverbs explicit MemoryTable( Communication & comm, mpi::IBVerbs & verbs ); +#elif defined LPF_CORE_MPI_USES_zero + explicit MemoryTable( Communication & comm, mpi::IBVerbsNoc & verbs ); #else explicit MemoryTable( Communication & comm ); #endif Slot addLocal( void * mem, std::size_t size ) ; // nothrow + Slot addNoc( void * mem, std::size_t size ) ; // nothrow + Slot addGlobal( void * mem, std::size_t size ); // nothrow void remove( Slot slot ); // nothrow @@ -90,8 +96,7 @@ class _LPFLIB_LOCAL MemoryTable #endif #if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero - mpi::IBVerbs::SlotID getVerbID( Slot slot ) const - { return m_memreg.lookup( slot ).slot; } + mpi::IBVerbs::SlotID getVerbID( Slot slot ) const; #endif void reserve( size_t size ); // throws bad_alloc, strong safe @@ -117,9 +122,13 @@ class _LPFLIB_LOCAL MemoryTable Communication & m_comm; #endif -#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero +#if defined LPF_CORE_MPI_USES_ibverbs + DirtyList m_added; + mpi::IBVerbs & m_ibverbs; + Communication & m_comm; +#elif defined LPF_CORE_MPI_USES_zero DirtyList m_added; - mpi::IBVerbs & m_ibverbs; + mpi::IBVerbsNoc & m_ibverbs; Communication & m_comm; #endif }; diff --git a/src/MPI/mesgqueue.cpp b/src/MPI/mesgqueue.cpp index f81a618a..0c0f05f2 100644 --- a/src/MPI/mesgqueue.cpp +++ b/src/MPI/mesgqueue.cpp @@ -16,6 +16,7 @@ */ #include "mesgqueue.hpp" +#include "ibverbs.hpp" #include "mpilib.hpp" #include "log.hpp" #include "assert.hpp" @@ -103,13 +104,13 @@ MessageQueue :: MessageQueue( Communication & comm ) , m_bodySends() , m_bodyRecvs() , m_comm( dynamic_cast(comm) ) + , m_tinyMsgBuf( m_tinyMsgSize + largestHeader(m_nprocs, m_memRange, 0, 0)) #if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero - , m_ibverbs( m_comm ) + , m_ibverbs(m_comm) , m_memreg( m_comm, m_ibverbs ) #else , m_memreg( m_comm ) #endif - , m_tinyMsgBuf( m_tinyMsgSize + largestHeader(m_nprocs, m_memRange, 0, 0)) { m_memreg.reserve(1); // reserve slot for edgeBuffer } @@ -243,6 +244,48 @@ err_t MessageQueue :: resizeMemreg( size_t nRegs ) return LPF_SUCCESS; } + +memslot_t MessageQueue :: addNocReg( void * mem, std::size_t size) +{ + memslot_t slot = m_memreg.addNoc( mem, size ); + ASSERT(slot != LPF_INVALID_MEMSLOT); + if (size > 0) + m_msgsort.addRegister( slot, static_cast( mem ), size); + return slot; +} + +err_t MessageQueue :: serializeSlot(memslot_t slot, char ** mem, std::size_t * size) +{ + ASSERT(slot != LPF_INVALID_MEMSLOT); + ASSERT(mem != nullptr); + ASSERT(size != nullptr); +#ifdef LPF_CORE_MPI_USES_zero + auto mr = m_ibverbs.getMR(m_memreg.getVerbID(slot), m_pid); + *size = mr.serialize(mem); + return LPF_SUCCESS; +#else + LOG( 3, "Error: serialize slot is only implemented for zero engine at the moment."); + return LPF_ERR_FATAL; +#endif + +} + +err_t MessageQueue :: deserializeSlot(char * mem, memslot_t slot) +{ + ASSERT(mem != nullptr); + ASSERT(slot != LPF_INVALID_MEMSLOT); +#ifdef LPF_CORE_MPI_USES_zero + auto mr = mpi::MemoryRegistration::deserialize(mem); + m_ibverbs.setMR(m_memreg.getVerbID(slot), mr->_pid, *mr); + return LPF_SUCCESS; +#else + LOG( 3, "Error: deserialize slot is only implemented for zero engine at the moment."); + return LPF_ERR_FATAL; +#endif + +} + + memslot_t MessageQueue :: addLocalReg( void * mem, std::size_t size) { memslot_t slot = m_memreg.addLocal( mem, size ); @@ -324,6 +367,12 @@ void MessageQueue :: get( pid_t srcPid, memslot_t srcSlot, size_t srcOffset, void MessageQueue :: lockSlot( memslot_t srcSlot, size_t srcOffset, pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size ) { + ASSERT(srcSlot != LPF_INVALID_MEMSLOT); + ASSERT(dstSlot != LPF_INVALID_MEMSLOT); + (void) srcOffset; + (void) dstOffset; + (void) dstPid; + (void) size; #ifdef LPF_CORE_MPI_USES_zero m_ibverbs.blockingCompareAndSwap(m_memreg.getVerbID(srcSlot), srcOffset, dstPid, m_memreg.getVerbID(dstSlot), dstOffset, size, 0ULL, 1ULL); #endif @@ -332,6 +381,12 @@ m_ibverbs.blockingCompareAndSwap(m_memreg.getVerbID(srcSlot), srcOffset, dstPid, void MessageQueue :: unlockSlot( memslot_t srcSlot, size_t srcOffset, pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size ) { + ASSERT(srcSlot != LPF_INVALID_MEMSLOT); + ASSERT(dstSlot != LPF_INVALID_MEMSLOT); + (void) srcOffset; + (void) dstOffset; + (void) dstPid; + (void) size; #ifdef LPF_CORE_MPI_USES_zero m_ibverbs.blockingCompareAndSwap(m_memreg.getVerbID(srcSlot), srcOffset, dstPid, m_memreg.getVerbID(dstSlot), dstOffset, size, 1ULL, 0ULL); #endif @@ -389,6 +444,7 @@ int MessageQueue :: sync( bool abort ) { #ifdef LPF_CORE_MPI_USES_zero // if not, deal with normal sync + (void) abort; m_memreg.sync(); m_ibverbs.sync(m_resized); m_resized = false; @@ -1018,32 +1074,33 @@ int MessageQueue :: sync( bool abort ) } -int MessageQueue :: countingSyncPerSlot(SlotID slot, size_t expected_sent, size_t expected_rcvd) +int MessageQueue :: countingSyncPerSlot(memslot_t slot, size_t expected_sent, size_t expected_rcvd) { + ASSERT(slot != LPF_INVALID_MEMSLOT); + (void) expected_sent; + (void) expected_rcvd; #ifdef LPF_CORE_MPI_USES_zero // if not, deal with normal sync m_memreg.sync(); - - m_ibverbs.countingSyncPerSlot(m_resized, slot, expected_sent, expected_rcvd); - + m_ibverbs.countingSyncPerSlot(m_memreg.getVerbID(slot), expected_sent, expected_rcvd); m_resized = false; + #endif return 0; } -int MessageQueue :: syncPerSlot(SlotID slot) +int MessageQueue :: syncPerSlot(memslot_t slot) { + ASSERT(slot != LPF_INVALID_MEMSLOT); #ifdef LPF_CORE_MPI_USES_zero // if not, deal with normal sync m_memreg.sync(); - - m_ibverbs.syncPerSlot(m_resized, slot); - + m_ibverbs.syncPerSlot(m_memreg.getVerbID(slot)); m_resized = false; #endif @@ -1051,28 +1108,41 @@ int MessageQueue :: syncPerSlot(SlotID slot) } -void MessageQueue :: getRcvdMsgCountPerSlot(size_t * msgs, SlotID slot) +void MessageQueue :: getRcvdMsgCountPerSlot(size_t * msgs, memslot_t slot) { + ASSERT(msgs != nullptr); + ASSERT(slot != LPF_INVALID_MEMSLOT); #ifdef LPF_CORE_MPI_USES_zero *msgs = 0; - m_ibverbs.get_rcvd_msg_count_per_slot(msgs, slot); + m_ibverbs.get_rcvd_msg_count_per_slot(msgs, m_memreg.getVerbID(slot)); #endif } void MessageQueue :: getRcvdMsgCount(size_t * msgs) { + ASSERT(msgs != nullptr); #ifdef LPF_CORE_MPI_USES_zero *msgs = 0; - m_ibverbs.get_rcvd_msg_count(msgs); + m_ibverbs.get_rcvd_msg_count(msgs); #endif } -void MessageQueue :: getSentMsgCountPerSlot(size_t * msgs, SlotID slot) +void MessageQueue :: getSentMsgCount(size_t * msgs) +{ + ASSERT(msgs != nullptr); +#ifdef LPF_CORE_MPI_USES_zero + *msgs = 0; + m_ibverbs.get_sent_msg_count(msgs); +#endif +} +void MessageQueue :: getSentMsgCountPerSlot(size_t * msgs, memslot_t slot) { + ASSERT(msgs != nullptr); + ASSERT(slot != LPF_INVALID_MEMSLOT); #ifdef LPF_CORE_MPI_USES_zero *msgs = 0; - m_ibverbs.get_sent_msg_count_per_slot(msgs, slot); + m_ibverbs.get_sent_msg_count_per_slot(msgs, m_memreg.getVerbID(slot)); #endif } diff --git a/src/MPI/mesgqueue.hpp b/src/MPI/mesgqueue.hpp index b4f1f796..198afa04 100644 --- a/src/MPI/mesgqueue.hpp +++ b/src/MPI/mesgqueue.hpp @@ -26,6 +26,9 @@ #include "messagesort.hpp" #include "mpilib.hpp" #include "linkage.hpp" +#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero +#include "ibverbsNoc.hpp" +#endif #if __cplusplus >= 201103L #include @@ -33,12 +36,7 @@ #include #endif -#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero -#include "ibverbs.hpp" -#endif - //only for HiCR -typedef size_t SlotID; namespace lpf { @@ -53,7 +51,9 @@ class _LPFLIB_LOCAL MessageQueue memslot_t addLocalReg( void * mem, std::size_t size ); + memslot_t addGlobalReg( void * mem, std::size_t size ); + void removeReg( memslot_t slot ); void get( pid_t srcPid, memslot_t srcSlot, size_t srcOffset, @@ -67,31 +67,36 @@ class _LPFLIB_LOCAL MessageQueue int sync( bool abort ); //only for HiCR -//#ifdef void lockSlot( memslot_t srcSlot, size_t srcOffset, pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size ); void unlockSlot( memslot_t srcSlot, size_t srcOffset, pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size ); - void getRcvdMsgCountPerSlot(size_t * msgs, SlotID slot); + void getRcvdMsgCountPerSlot(size_t * msgs, memslot_t slot); void getRcvdMsgCount(size_t * msgs); - void getSentMsgCountPerSlot(size_t * msgs, SlotID slot); + void getSentMsgCountPerSlot(size_t * msgs, memslot_t slot); + + void getSentMsgCount(size_t * msgs); void flushSent(); void flushReceived(); - int countingSyncPerSlot(SlotID slot, size_t expected_sent, size_t expected_rcvd); + int countingSyncPerSlot(memslot_t slot, size_t expected_sent, size_t expected_rcvd); + + int syncPerSlot(memslot_t slot); + // NOC extensions + memslot_t addNocReg( void * mem, std::size_t size ); - int syncPerSlot(SlotID slot); + err_t serializeSlot(memslot_t slot, char ** buff, std::size_t * buff_size); + err_t deserializeSlot(char * buff, memslot_t slot); // end only for HiCR -//#endif private: - enum Msgs { BufPut , + enum Msgs { BufPut , BufGet, BufGetReply, HpPut, HpGet , HpBodyReply , HpEdges, HpEdgesReply }; @@ -100,7 +105,7 @@ class _LPFLIB_LOCAL MessageQueue SrcPid, DstPid, SrcOffset, DstOffset, BufOffset, SrcSlot, DstSlot, Size, - RoundedDstOffset, RoundedSize, + RoundedDstOffset, RoundedSize, Payload, Head, Tail}; struct Edge { @@ -160,11 +165,14 @@ class _LPFLIB_LOCAL MessageQueue std::vector< Body > m_bodySends; std::vector< Body > m_bodyRecvs; mpi::Comm m_comm; -#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero + std::vector< char > m_tinyMsgBuf; +protected: +#if defined LPF_CORE_MPI_USES_ibverbs mpi::IBVerbs m_ibverbs; +#elif defined LPF_CORE_MPI_USES_zero + mpi::IBVerbsNoc m_ibverbs; #endif MemoryTable m_memreg; - std::vector< char > m_tinyMsgBuf; }; diff --git a/src/common/memreg.hpp b/src/common/memreg.hpp index f48d519c..9e6c4b87 100644 --- a/src/common/memreg.hpp +++ b/src/common/memreg.hpp @@ -211,6 +211,7 @@ class CombinedMemoryRegister void destroy() { m_local.destroy(); m_global.destroy(); + m_noc.destroy(); } Slot addLocalReg( Record record ) // nothrow @@ -218,6 +219,12 @@ class CombinedMemoryRegister return toLocal( m_local.add( record ) ); } + Slot addNocReg( Record record ) // nothrow + { + Slot a = toNoc( m_noc.add( record ) ); + return a; + } + Slot addGlobalReg( Record record ) // nothrow { return toGlobal( m_global.add(record) ); @@ -227,24 +234,31 @@ class CombinedMemoryRegister { if (isLocalSlot(slot)) m_local.remove( fromLocal(slot) ) ; - else + else if (isGlobalSlot(slot)) m_global.remove( fromGlobal( slot ) ); + else + m_noc.remove(fromNoc( slot ) ); } const Record & lookup( Slot slot ) const // nothrow { if (isLocalSlot(slot)) return m_local.lookup( fromLocal(slot)); - else + else if (isGlobalSlot(slot)) return m_global.lookup( fromGlobal( slot )); + else {// isNocSlot(slot) == true + return m_noc.lookup( fromNoc( slot )); + } } Record & update( Slot slot ) // nothrow { if (isLocalSlot(slot)) return m_local.update( fromLocal(slot)); - else + else if (isGlobalSlot(slot)) return m_global.update( fromGlobal( slot )); + else // noc Slot + return m_noc.update(fromNoc(slot)); } void reserve( size_t size, const Record & defaultRecord = Record() ) @@ -252,36 +266,50 @@ class CombinedMemoryRegister { m_global.reserve( size, defaultRecord ); m_local.reserve( size, defaultRecord ); + m_noc.reserve( size, defaultRecord ); } size_t capacity( ) const { - return std::min( m_global.capacity(), m_local.capacity() ); + return std::min(std::min( m_global.capacity(), m_local.capacity()), m_noc.capacity() ); } size_t range() const { - return std::max( 2*m_global.capacity(), 2*m_local.capacity()+1 ); + return std::max(std::max( 3*m_global.capacity(), 3*m_local.capacity()+1), 3*m_noc.capacity()+2); } static bool isLocalSlot( Slot slot ) - { return slot % 2 == 1; } + { return slot % 3 == 1; } + + static bool isGlobalSlot( Slot slot ) + { return slot % 3 == 0; } + + static bool isNocSlot( Slot slot ) + { return slot % 3 == 2; } private: static Slot fromGlobal( Slot slot ) - { return slot / 2; } + { return slot / 3; } static Slot fromLocal( Slot slot ) - { return (slot - 1) / 2; } + { return (slot - 1) / 3; } + + static Slot fromNoc( Slot slot ) + { return (slot - 2) / 3; } static Slot toGlobal( Slot slot ) - { return 2*slot; } + { return 3*slot; } static Slot toLocal( Slot slot ) - { return 2*slot + 1; } + { return 3*slot + 1; } + + static Slot toNoc( Slot slot ) + { return 3*slot + 2; } MemoryRegister m_local; MemoryRegister m_global; + MemoryRegister m_noc; }; } // namespace lpf diff --git a/src/debug/CMakeLists.txt b/src/debug/CMakeLists.txt index 28af2937..7f3f9c92 100644 --- a/src/debug/CMakeLists.txt +++ b/src/debug/CMakeLists.txt @@ -25,8 +25,8 @@ add_library( ${libname} rwconflict.cpp $ ) -target_link_libraries( ${libname} ${LIB_POSIX_THREADS}) -target_include_directories( ${libname} PRIVATE ${MPI_C_INCLUDE_PATH}) +target_link_libraries(${libname} ${LIB_POSIX_THREADS}) +target_include_directories(${libname} PRIVATE ${MPI_C_INCLUDE_PATH}) set_target_properties(${libname} PROPERTIES SOVERSION ${SOVERSION} ) @@ -35,6 +35,7 @@ install(TARGETS ${libname} EXPORT lpf RUNTIME DESTINATION ${INSTALL_BIN} LIBRARY DESTINATION ${INSTALL_LIB} ARCHIVE DESTINATION ${INSTALL_LIB} - ) +) -add_gtest(rwconflict_test "pthread" ON rwconflict.t.cpp rwconflict.cpp) +add_gtest(rwconflict_test "pthread" rwconflict.t.cpp rwconflict.cpp) + #$ ) diff --git a/src/debug/core.cpp b/src/debug/core.cpp index 00f025f6..c3d0adec 100644 --- a/src/debug/core.cpp +++ b/src/debug/core.cpp @@ -16,6 +16,7 @@ */ #include "debug/lpf/core.h" +#include "lpf/abort.h" #undef lpf_get #undef lpf_put @@ -29,12 +30,6 @@ #undef lpf_exec #undef lpf_hook #undef lpf_rehook -#undef lpf_abort -#undef lpf_get_rcvd_msg_count -#undef lpf_get_rcvd_msg_count_per_slot -#undef lpf_get_sent_msg_count_per_slot -#undef lpf_flush -#undef lpf_abort #undef lpf_init_t #undef lpf_pid_t @@ -62,6 +57,7 @@ #undef LPF_NONE #undef LPF_INIT_NONE #undef LPF_NO_ARGS +#undef LPF_HAS_ABORT #if __cplusplus >= 201103L #include @@ -105,9 +101,21 @@ class _LPFLIB_LOCAL Interface { } static void threadInit() { + // in the below we use std::abort as these are critical *internal* + // errors, not errors in the use of LPF core functionality. + // By contrast, errors that appear due to misuse of the LPF core primitives + // should call lpf_abort. This initialiser ensures that the underlying LPF + // engine has support for lpf_abort. + // The above logic about when to std::abort and when to lpf_abort is applied + // consistently in the below implementation. Only (seemingly) exceptions will + // be documented henceforth. int rc = pthread_key_create( &s_threadKeyCtxStore, &destroyCtxStore ); if (rc) { - LOG( 0, "Internal error while initializing thread static storage"); + LOG( 0, "Internal error while initializing thread static storage" ); + std::abort(); + } + if( ! LPF_HAS_ABORT ) { + LOG( 0, "Debug layer relies on lpf_abort, but selected engine does not support it" ); std::abort(); } } @@ -491,6 +499,13 @@ class _LPFLIB_LOCAL Interface { static lpf_err_t hook( const char * file, int line, lpf_init_t init, lpf_spmd_t spmd, lpf_args_t args ) { + // the lpf_hook could arise from any non-LPF context -- this is in fact + // why it exists: hooking from within an LPF context to create a subcontext is + // provided by lpf_rehook instead. + // Because the callee context is potentially not controlled by the underlying + // LPF engine, and because the callee context in the non-trivial case consists + // of multiple distributed processes, we cannot rely on lpf_abort. The only + // thing we can do is rely on the standard abort. if ( spmd == NULL ) { LOG( 0, file << ":" << line << ": Invalid argument passed to lpf_hook: NULL spmd argument" ); @@ -703,18 +718,6 @@ class _LPFLIB_LOCAL Interface { return LPF_SUCCESS; } - lpf_err_t get_rcvd_msg_count_per_slot(size_t *msgs, lpf_memslot_t slot) { - return LPF_SUCCESS; - } - - lpf_err_t get_sent_msg_count_per_slot(size_t *msgs, lpf_memslot_t slot) { - return LPF_SUCCESS; - } - - lpf_err_t get_rcvd_msg_count(size_t *msgs) { - return LPF_SUCCESS; - } - lpf_err_t register_local( const char * file, int line, void * pointer, size_t size, lpf_memslot_t * memslot ) { @@ -1023,7 +1026,6 @@ class _LPFLIB_LOCAL Interface { return LPF_SUCCESS; } - lpf_err_t abort(const char * file, int line) { (void) file; (void) line; diff --git a/src/hybrid/CMakeLists.txt b/src/hybrid/CMakeLists.txt index c2a87b14..ea1a3885 100644 --- a/src/hybrid/CMakeLists.txt +++ b/src/hybrid/CMakeLists.txt @@ -20,8 +20,10 @@ if (HYBRID_ENGINE_ENABLED) set(LPF_IMPL_ID hybrid) set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME}) -set(LPFLIB_HYBRID_MPI_ENGINE "ibverbs" CACHE STRING - "Choice of MPI engine to use for inter-process communication") +if( NOT DEFINED LPFLIB_HYBRID_MPI_ENGINE ) + message( FATAL_ERROR "Hybrid engine is enabled but no inter-node engine was selected" ) +endif() + set(mpi_engine "${LPFLIB_HYBRID_MPI_ENGINE}" ) message( STATUS "Hybrid implementation's multi-node layer is '${mpi_engine}'") diff --git a/src/hybrid/core.cpp b/src/hybrid/core.cpp index 39226a18..16b738d6 100644 --- a/src/hybrid/core.cpp +++ b/src/hybrid/core.cpp @@ -37,6 +37,10 @@ extern "C" { +// the value 2 in this implementation indicates support for lpf_abort in a way +// that may deviate from the stdlib abort() +_LPFLIB_VAR const int LPF_HAS_ABORT = 2; + _LPFLIB_VAR const lpf_err_t LPF_SUCCESS = 0; _LPFLIB_VAR const lpf_err_t LPF_ERR_OUT_OF_MEMORY = 1; _LPFLIB_VAR const lpf_err_t LPF_ERR_FATAL = 2; @@ -414,11 +418,15 @@ _LPFLIB_API lpf_err_t lpf_get_rcvd_msg_count( lpf_t ctx, size_t * rcvd_msgs) _LPFLIB_API lpf_err_t lpf_get_rcvd_msg_count_per_slot( lpf_t ctx, size_t * rcvd_msgs, lpf_memslot_t slot ) { + using namespace lpf::hybrid; + if (ctx == LPF_SINGLE_PROCESS) + return LPF_SUCCESS; ThreadState * t = realContext(ctx); - MPI mpi = t->nodeState().mpi(); - mpi.abort(); - return LPF_SUCCESS; + if (!t->error()) + return t->getRcvdMsgCountPerSlot(rcvd_msgs, slot); + else + return LPF_SUCCESS; } _LPFLIB_API lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t * sent_msgs, lpf_memslot_t slot ) @@ -428,7 +436,19 @@ _LPFLIB_API lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t * sent_ return LPF_SUCCESS; ThreadState * t = realContext(ctx); if (!t->error()) - return t->getSentMsgCount(sent_msgs, slot); + return t->getSentMsgCountPerSlot(sent_msgs, slot); + else + return LPF_SUCCESS; +} + +_LPFLIB_API lpf_err_t lpf_get_sent_msg_count( lpf_t ctx, size_t * sent_msgs) +{ + using namespace lpf::hybrid; + if (ctx == LPF_SINGLE_PROCESS) + return LPF_SUCCESS; + ThreadState * t = realContext(ctx); + if (!t->error()) + return t->getSentMsgCount(sent_msgs); else return LPF_SUCCESS; } diff --git a/src/hybrid/dispatch.hpp b/src/hybrid/dispatch.hpp index 2dc83c2b..e9f6b6b6 100644 --- a/src/hybrid/dispatch.hpp +++ b/src/hybrid/dispatch.hpp @@ -19,23 +19,29 @@ #define LPF_CORE_HYBRID_DISPATCH_HPP #undef LPFLIB_CORE_H +#undef LPFLIB_ABORT_H #define LPF_CORE_STATIC_DISPATCH #define LPF_CORE_STATIC_DISPATCH_ID pthread #define LPF_CORE_STATIC_DISPATCH_CONFIG LPF_CORE_IMPL_CONFIG #include +#include #undef LPF_CORE_STATIC_DISPATCH_ID #undef LPF_CORE_STATIC_DISPATCH_CONFIG #undef LPFLIB_CORE_H +#undef LPFLIB_ABORT_H #define LPF_CORE_STATIC_DISPATCH_ID LPF_CORE_MULTI_NODE_ENGINE #define LPF_CORE_STATIC_DISPATCH_CONFIG LPF_CORE_IMPL_CONFIG #include +#include #undef LPF_CORE_STATIC_DISPATCH_ID #undef LPF_CORE_STATIC_DISPATCH_CONFIG #undef LPFLIB_CORE_H +#undef LPFLIB_ABORT_H #undef LPF_CORE_STATIC_DISPATCH #include +#include #define USE_THREAD( symbol ) \ LPF_RENAME_PRIMITIVE4( lpf, pthread, LPF_CORE_IMPL_CONFIG, symbol ) @@ -121,6 +127,9 @@ namespace lpf { namespace hybrid { err_t get_rcvd_msg_count( size_t * rcvd_msgs) { return USE_THREAD( get_rcvd_msg_count)(m_ctx, rcvd_msgs); } + err_t get_sent_msg_count( size_t * sent_msgs) + { return USE_THREAD( get_sent_msg_count)(m_ctx, sent_msgs); } + err_t flush_sent() { return USE_THREAD(flush_sent)(m_ctx); } @@ -223,15 +232,18 @@ namespace lpf { namespace hybrid { err_t deregister( memslot_t memslot) { return USE_MPI( deregister)(m_ctx, memslot); } + err_t get_rcvd_msg_count( size_t * rcvd_msgs) + { return USE_MPI( get_rcvd_msg_count)(m_ctx, rcvd_msgs); } + err_t get_rcvd_msg_count_per_slot(size_t *rcvd_msgs, lpf_memslot_t slot) { return USE_MPI( get_rcvd_msg_count_per_slot)( m_ctx, rcvd_msgs, slot); } + err_t get_sent_msg_count( size_t * sent_msgs) + { return USE_MPI( get_sent_msg_count)(m_ctx, sent_msgs); } + err_t get_sent_msg_count_per_slot(size_t *sent_msgs, lpf_memslot_t slot) { return USE_MPI( get_sent_msg_count_per_slot)( m_ctx, sent_msgs, slot); } - err_t get_rcvd_msg_count( size_t * rcvd_msgs) - { return USE_MPI( get_rcvd_msg_count)(m_ctx, rcvd_msgs); } - err_t flush_sent() {return USE_MPI( flush_sent)(m_ctx);} diff --git a/src/hybrid/state.hpp b/src/hybrid/state.hpp index 06e8faf3..f890be6b 100644 --- a/src/hybrid/state.hpp +++ b/src/hybrid/state.hpp @@ -111,13 +111,6 @@ class _LPFLIB_LOCAL NodeState { return m_mpi.sync(); } -// MPI::err_t counting_sync_per_slot(lpf_memslot_t slot, size_t expected_sent, size_t expected_rcvd) -// { -// m_memreg.flush( m_mpi ); -// m_msgQueue.flush( m_mpi, m_memreg ); -// return m_mpi.counting_sync_per_slot(slot, expected_sent, expected_rcvd); -// } - static double messageGap( lpf_pid_t nprocs, size_t minMsgSize, lpf_sync_attr_t attr) { (void) nprocs; @@ -422,14 +415,19 @@ class _LPFLIB_LOCAL ThreadState { bool error() const { return m_error; } - lpf_pid_t getRcvdMsgCount(size_t * rcvd_msgs, lpf_memslot_t slot) { + lpf_pid_t getRcvdMsgCountPerSlot(size_t * rcvd_msgs, lpf_memslot_t slot) { return m_nodeState.mpi().get_rcvd_msg_count_per_slot(rcvd_msgs, slot); } - lpf_pid_t getSentMsgCount(size_t * sent_msgs, lpf_memslot_t slot) { + lpf_pid_t getSentMsgCountPerSlot(size_t * rcvd_msgs, lpf_memslot_t slot) { + + return m_nodeState.mpi().get_sent_msg_count_per_slot(rcvd_msgs, slot); + } + + lpf_pid_t getSentMsgCount(size_t * sent_msgs) { - return m_nodeState.mpi().get_sent_msg_count_per_slot(sent_msgs, slot); + return m_nodeState.mpi().get_sent_msg_count(sent_msgs); } lpf_pid_t getRcvdMsgCount(size_t * rcvd_msgs) { diff --git a/src/imp/core.c b/src/imp/core.c index 7b4c3db2..994a18fd 100644 --- a/src/imp/core.c +++ b/src/imp/core.c @@ -16,12 +16,15 @@ */ #include +#include #include #include #include #include +const int LPF_HAS_ABORT = 0; + const lpf_err_t LPF_SUCCESS = 0; const lpf_err_t LPF_ERR_OUT_OF_MEMORY = 1; const lpf_err_t LPF_ERR_FATAL = 2; @@ -141,6 +144,9 @@ lpf_err_t lpf_counting_sync_per_slot( lpf_t lpf, lpf_sync_attr_t attr, lpf_memsl { (void) lpf; (void) attr; + (void) slot; + (void) expected_sent; + (void) expected_rcvd; return LPF_SUCCESS; } @@ -154,6 +160,15 @@ lpf_err_t lpf_lock_slot( size_t size, lpf_msg_attr_t attr ) { + + (void) ctx; + (void) src_slot; + (void) src_offset; + (void) dst_pid; + (void) dst_slot; + (void) dst_offset; + (void) size; + (void) attr; return LPF_SUCCESS; } @@ -167,6 +182,14 @@ lpf_err_t lpf_unlock_slot( size_t size, lpf_msg_attr_t attr ) { + (void) ctx; + (void) src_slot; + (void) src_offset; + (void) dst_pid; + (void) dst_slot; + (void) dst_offset; + (void) size; + (void) attr; return LPF_SUCCESS; } @@ -215,17 +238,26 @@ lpf_err_t lpf_resize_memory_register( lpf_t lpf, size_t max_regs ) lpf_err_t lpf_get_rcvd_msg_count_per_slot( lpf_t lpf, size_t * rcvd_msgs, lpf_memslot_t slot) { (void) lpf; *rcvd_msgs = 0; + (void) slot; return LPF_SUCCESS; } lpf_err_t lpf_get_rcvd_msg_count( lpf_t lpf, size_t * rcvd_msgs) { (void) lpf; + *rcvd_msgs = 0; + return LPF_SUCCESS; +} + +lpf_err_t lpf_get_sent_msg_count( lpf_t lpf, size_t * sent_msgs) { + (void) lpf; + *sent_msgs = 0; return LPF_SUCCESS; } lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t lpf, size_t * sent_msgs, lpf_memslot_t slot) { (void) lpf; *sent_msgs = 0; + (void) slot; return LPF_SUCCESS; } @@ -239,3 +271,79 @@ lpf_err_t lpf_abort( lpf_t lpf) (void) lpf; return LPF_SUCCESS; } + +lpf_err_t lpf_noc_resize_memory_register( lpf_t ctx, size_t max_regs ) +{ + (void) ctx; + (void) max_regs; + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_register( + lpf_t ctx, + void * pointer, + size_t size, + lpf_memslot_t * memslot +) +{ + (void) ctx; + (void) pointer; + (void) size; + (void) memslot; + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_deregister( + lpf_t ctx, + lpf_memslot_t memslot +) +{ + (void) ctx; + (void) memslot; + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_put( + lpf_t ctx, + lpf_memslot_t src_slot, + size_t src_offset, + lpf_pid_t dst_pid, + lpf_memslot_t dst_slot, + size_t dst_offset, + size_t size, + lpf_msg_attr_t attr +) +{ + (void) ctx; + (void) src_slot; + (void) src_offset; + (void) dst_pid; + (void) dst_slot; + (void) dst_offset; + (void) size; + (void) attr; + return LPF_SUCCESS; +} + +lpf_err_t lpf_noc_get( + lpf_t ctx, + lpf_pid_t src_pid, + lpf_memslot_t src_slot, + size_t src_offset, + lpf_memslot_t dst_slot, + size_t dst_offset, + size_t size, + lpf_msg_attr_t attr +) +{ + (void) ctx; + (void) src_pid; + (void) src_slot; + (void) src_offset; + (void) dst_slot; + (void) dst_offset; + (void) size; + (void) attr; + + return LPF_SUCCESS; +} diff --git a/src/pthreads/barrier.cpp b/src/pthreads/barrier.cpp index cacfbbf8..92442474 100644 --- a/src/pthreads/barrier.cpp +++ b/src/pthreads/barrier.cpp @@ -82,9 +82,9 @@ namespace { { #ifdef VALGRIND_MEMCHECK #ifdef LPF_ON_MACOS - pthread_yield_np(); + sched_yield_np(); #else - pthread_yield(); // allow other processes to progress + sched_yield(); // allow other processes to progress #endif #endif } @@ -145,7 +145,7 @@ namespace { if (m_available ) _mm_mwait(0, 0); else - pthread_yield(); + sched_yield(); } bool m_available; @@ -160,9 +160,9 @@ namespace { void pause() { #ifdef LPF_ON_MACOS - pthread_yield_np(); + sched_yield_np(); #else - if (pthread_yield()) { + if (sched_yield()) { LOG(2, "While waiting, the Posix thread library failed to " "yield the CPU to the OS" ); } diff --git a/src/pthreads/core.cpp b/src/pthreads/core.cpp index 763d9a44..38799e9a 100644 --- a/src/pthreads/core.cpp +++ b/src/pthreads/core.cpp @@ -17,6 +17,7 @@ #include #include +#include #include "threadlocaldata.hpp" #include "machineparams.hpp" @@ -37,6 +38,10 @@ #include // for pthreads +// the value 2 in this implementation indicates support for lpf_abort in a way +// that may deviate from the stdlib abort() +const int LPF_HAS_ABORT = 2; + const lpf_err_t LPF_SUCCESS = 0; const lpf_err_t LPF_ERR_OUT_OF_MEMORY = 1; const lpf_err_t LPF_ERR_FATAL = 2; @@ -386,6 +391,7 @@ lpf_err_t lpf_resize_memory_register( lpf_t ctx, size_t max_regs ) } lpf_err_t lpf_get_rcvd_msg_count_per_slot(lpf_t ctx, size_t * msgs, lpf_memslot_t slot) { + (void) slot; *msgs = 0; lpf::ThreadLocalData * t = realCtx(ctx); if (t->isAborted()) @@ -403,6 +409,15 @@ lpf_err_t lpf_get_rcvd_msg_count(lpf_t ctx, size_t * msgs) { } lpf_err_t lpf_get_sent_msg_count_per_slot(lpf_t ctx, size_t * msgs, lpf_memslot_t slot) { + *msgs = 0; + (void) slot; + lpf::ThreadLocalData * t = realCtx(ctx); + if (t->isAborted()) + return LPF_SUCCESS; + return LPF_SUCCESS; +} + +lpf_err_t lpf_get_sent_msg_count(lpf_t ctx, size_t * msgs) { *msgs = 0; lpf::ThreadLocalData * t = realCtx(ctx); if (t->isAborted()) diff --git a/src/pthreads/threadlocaldata.cpp b/src/pthreads/threadlocaldata.cpp index 6a62e4d3..1923b272 100644 --- a/src/pthreads/threadlocaldata.cpp +++ b/src/pthreads/threadlocaldata.cpp @@ -442,6 +442,10 @@ err_t ThreadLocalData :: sync( bool expectExit) } err_t ThreadLocalData :: countingSyncPerSlot(bool expectExit, lpf_memslot_t slot, size_t expected_sent, size_t expected_rcvd) { + (void) expectExit; + (void) slot; + (void) expected_sent; + (void) expected_rcvd; return LPF_SUCCESS; } diff --git a/test_launcher.py.in b/test_launcher.py.in new file mode 100644 index 00000000..656e570f --- /dev/null +++ b/test_launcher.py.in @@ -0,0 +1,38 @@ +import argparse +import subprocess +import sys + +parser = argparse.ArgumentParser( description='Death test launcher' ) +parser.add_argument("-e", "--engine", type=str) +parser.add_argument("-L", "--parallel_launcher", type=str) +parser.add_argument("-p", "--min_process_count", type=int) +parser.add_argument("-P", "--max_process_count", type=int) +parser.add_argument("-t", "--lpf_probe_timer", type=float) +parser.add_argument("-R", "--expected_return_code", type=int) +parser.add_argument( 'cmd', nargs=argparse.REMAINDER ) +args = parser.parse_args() + +# This is only for passing Gtest info to CMake +# The parallel launcher is still needed as Open MPI +# binaries terminate without the launcher on our cluster, +# even for single process runs +if args.cmd[-1] == '--gtest_list_tests': + run_cmd = [args.parallel_launcher, '-engine', args.engine, '-n', '1'] + args.cmd + cmd = subprocess.run( run_cmd) + sys.exit(cmd.returncode) +# Actual use of our launcher +else: + for i in range(args.min_process_count, args.max_process_count+1): + if args.lpf_probe_timer > 0.0: + run_cmd = [args.parallel_launcher, '-engine', args.engine, '-probe', str(args.lpf_probe_timer), '-n', str(i)] + args.cmd + else: + run_cmd = [args.parallel_launcher, '-engine', args.engine, '-n', str(i)] + args.cmd + print("Run command: ") + print(run_cmd) + cmd = subprocess.run( run_cmd) + print("Test returned code = " + str(cmd.returncode)) + retcode = cmd.returncode + if (retcode != args.expected_return_code): + print("Test " + args.cmd[0] + args.cmd[1] + "\nreturned\t" + str(retcode) + "\nexpected return code was: " + str(args.expected_return_code)) + sys.exit(1) + print("Test " + args.cmd[0] + args.cmd[1] + " passed") diff --git a/tests/functional/CMakeLists.txt b/tests/functional/CMakeLists.txt index 11e4baf1..04ebf85d 100644 --- a/tests/functional/CMakeLists.txt +++ b/tests/functional/CMakeLists.txt @@ -77,26 +77,20 @@ set(test_sources func_lpf_exec_single_call_single_arg_single_proc.cpp func_lpf_get_parallel_alltoall.cpp func_lpf_get_parallel_huge.cpp - func_lpf_get_parallel_single.cpp - #func_lpf_hook_simple.mpirma.cpp - #func_lpf_hook_simple.pthread.cpp - #func_lpf_hook_subset.mpimsg.cpp - #func_lpf_hook_tcp.mpirma.cpp - #func_lpf_hook_tcp_timeout.mpirma.cpp - #func_lpf_put_parallel_bad_pattern.cpp <= in exception_list - func_lpf_put_and_get_overlapping.cpp func_lpf_get_parallel_overlapping_complete.cpp - func_lpf_put_parallel_overlapping_complete.cpp func_lpf_get_parallel_overlapping_pyramid.cpp - func_lpf_put_parallel_overlapping_pyramid.cpp func_lpf_get_parallel_overlapping_rooftiling.cpp - func_lpf_put_parallel_overlapping_rooftiling.cpp + func_lpf_get_parallel_single.cpp func_lpf_probe_parallel_full.cpp func_lpf_probe_parallel_nested.cpp func_lpf_probe_root.cpp + func_lpf_put_and_get_overlapping.cpp func_lpf_put_parallel_alltoall.cpp func_lpf_put_parallel_big.cpp func_lpf_put_parallel_huge.cpp + func_lpf_put_parallel_overlapping_complete.cpp + func_lpf_put_parallel_overlapping_pyramid.cpp + func_lpf_put_parallel_overlapping_rooftiling.cpp func_lpf_put_parallel_single.cpp func_lpf_register_and_deregister_irregularly.cpp func_lpf_register_and_deregister_many_global.cpp @@ -142,21 +136,39 @@ foreach (LPF_IMPL_ID ${ENGINES}) string(REGEX MATCH "overlapping|early|bsplib" foundTest ${testSource}) if (NOT ${LPF_IMPL_ID} STREQUAL "zero") add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}") - - string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource}) - get_filename_component(baseName ${testSource} NAME_WE ) - set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}") elseif ("${foundTest}" STREQUAL "") add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}") - - string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource}) - get_filename_component(baseName ${testSource} NAME_WE ) - set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}") endif() endforeach(testSource) + endforeach(LPF_IMPL_ID) +# Individual test for NOC (Non-coherence) protocol, only for zero engine +# (part of HiCR project) +set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME}) +set(mode "") +set(exeName "func_lpf_test_noc_ring_zero_${LPF_IMPL_CONFIG}${mode}") +add_gtest(${exeName} "zero" OFF "${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_test_noc_ring.cpp") + +# start of engine-specific tests +foreach (LPF_IMPL_ID ${ENGINES}) + if ("${LPF_IMPL_ID}" STREQUAL "pthread" OR "${LPF_IMPL_ID}" STREQUAL "mpirma") + foreach(testSource func_lpf_hook_simple.${LPF_IMPL_ID}.cpp) + string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource}) + get_filename_component(baseName ${testSource} NAME_WE ) + set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}") + add_gtest(${exeName} ${LPF_IMPL_ID} ON "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}") + endforeach(testSource) + endif() + if ("${LPF_IMPL_ID}" STREQUAL "mpimsg") + add_gtest(func_lpf_hook_subset.mpimsg mpimsg ON "${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_hook_subset.mpimsg.cpp") + endif() + if ("${LPF_IMPL_ID}" STREQUAL "mpirma") + add_gtest(func_lpf_hook_tcp_timeout.mpirma mpirma ON "${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_hook_tcp_timeout.mpirma.cpp") + endif() +endforeach(LPF_IMPL_ID) +# end of engine-specific tests include_directories(.) add_subdirectory(debug) diff --git a/tests/functional/collectives/CMakeLists.txt b/tests/functional/collectives/CMakeLists.txt index 463b4de5..80aaa137 100644 --- a/tests/functional/collectives/CMakeLists.txt +++ b/tests/functional/collectives/CMakeLists.txt @@ -17,21 +17,21 @@ set(c99_tests_sources -func_lpf_allcombine.cpp -func_lpf_allgather.cpp -func_lpf_allgather_overlapped.cpp -func_lpf_allreduce.cpp -func_lpf_alltoall.cpp -func_lpf_broadcast.cpp -func_lpf_broadcast_prime_size_object.cpp -func_lpf_broadcast_small_prime_size_object.cpp -func_lpf_collectives_init.cpp -func_lpf_collectives_init_overflow.cpp -func_lpf_combine.cpp -func_lpf_gather.cpp -func_lpf_reduce.cpp -func_lpf_scatter.cpp -func_lpf_zero_cost.cpp + func_lpf_allcombine.cpp + func_lpf_allgather.cpp + func_lpf_allgather_overlapped.cpp + func_lpf_allreduce.cpp + func_lpf_alltoall.cpp + func_lpf_broadcast.cpp + func_lpf_broadcast_prime_size_object.cpp + func_lpf_broadcast_small_prime_size_object.cpp + func_lpf_collectives_init.cpp + func_lpf_collectives_init_overflow.cpp + func_lpf_combine.cpp + func_lpf_gather.cpp + func_lpf_reduce.cpp + func_lpf_scatter.cpp + func_lpf_zero_cost.cpp ) foreach (LPF_IMPL_ID ${ENGINES}) @@ -51,9 +51,5 @@ foreach (LPF_IMPL_ID ${ENGINES}) add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}") - string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource}) - get_filename_component(baseName ${testSource} NAME_WE ) - set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}") - endforeach(testSource) endforeach(LPF_IMPL_ID) diff --git a/tests/functional/debug/CMakeLists.txt b/tests/functional/debug/CMakeLists.txt index 0292d488..67ffcb5d 100644 --- a/tests/functional/debug/CMakeLists.txt +++ b/tests/functional/debug/CMakeLists.txt @@ -37,10 +37,6 @@ set(debug_test_sources func_lpf_debug_global_deregister_order_mismatch.cpp func_lpf_debug_global_deregister_unequal.cpp func_lpf_debug_global_register_null_memreg.cpp - #func_lpf_debug_hook_null_f_symbols.pthread.cpp - #func_lpf_debug_hook_null_input.pthread.cpp - #func_lpf_debug_hook_null_output.pthread.cpp - #func_lpf_debug_hook_null_spmd.pthread.cpp func_lpf_debug_local_register_null_memreg.cpp func_lpf_debug_put_after_deregister_dest_after_sync.cpp func_lpf_debug_put_after_deregister_dest.cpp @@ -90,10 +86,18 @@ foreach (LPF_IMPL_ID ${ENGINES}) add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}" ) - string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${CMAKE_CURRENT_SOURCE_DIR}/${testSource}) - get_filename_component(baseName ${testSource} NAME_WE ) - set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}") - endforeach(testSource) endforeach(LPF_IMPL_ID) +add_gtest(func_lpf_debug_hook_f_symbols_pthread "pthread" ON + ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_f_symbols.pthread.cpp) + +add_gtest(func_lpf_debug_hook_null_input_pthread "pthread" ON + ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_input.pthread.cpp) + +add_gtest(func_lpf_debug_hook_null_output_pthread "pthread" ON + ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_output.pthread.cpp) + +add_gtest(func_lpf_debug_hook_null_spmd_pthread "pthread" ON + ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_spmd.pthread.cpp) + diff --git a/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp b/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp index 139bad91..5afa95a2 100644 --- a/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp +++ b/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp @@ -21,47 +21,22 @@ void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) { - (void) args; - int x = 3; int y = 6; - lpf_memslot_t xSlot = LPF_INVALID_MEMSLOT; - lpf_memslot_t ySlot = LPF_INVALID_MEMSLOT; - - lpf_err_t rc = lpf_resize_memory_register( lpf, 2 ); - EXPECT_EQ(LPF_SUCCESS, rc ); - - rc = lpf_resize_message_queue( lpf, 2 ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - rc = lpf_sync( lpf, LPF_SYNC_DEFAULT ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - rc = lpf_register_global( lpf, &x, sizeof(x), &xSlot ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - rc = lpf_register_global( lpf, &y, sizeof(y), &ySlot ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - rc = lpf_sync( lpf, LPF_SYNC_DEFAULT ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - rc = lpf_get( lpf, (pid+1)%nprocs, xSlot, 3, ySlot, 0, sizeof(x), LPF_MSG_DEFAULT ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - FAIL(); - // the write error will be detected at this sync - //rc = lpf_sync( lpf, LPF_SYNC_DEFAULT ); + (void) pid; (void) nprocs; (void) args; + lpf_memslot_t slot; + memset( &slot, 1, sizeof(slot)); // init to some weird data + lpf_deregister( lpf, slot ); } /** - * \test Testing for a lpf_get() that reads past globally registered memory bounds - * \pre P >= 2 - * \return Message: source memory .* is read past the end by 3 bytes + * \test Deregister a non-registered slot + * \pre P >= 1 + * \return Message: Invalid attempt to deregister a memory slot, because it has not been registered before * \return Exit code: 6 */ -TEST( API, func_lpf_debug_deregister_non_existing_slot ) +TEST(API, func_lpf_debug_deregister_non_existing_slot ) { lpf_err_t rc = LPF_SUCCESS; rc = lpf_exec( LPF_ROOT, LPF_MAX_P, &spmd, LPF_NO_ARGS ); - EXPECT_EQ( LPF_SUCCESS, rc ); + EXPECT_EQ(LPF_SUCCESS, rc ); } diff --git a/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp b/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp index 90da31e8..895260c9 100644 --- a/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp +++ b/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp @@ -55,7 +55,6 @@ void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) EXPECT_EQ(3, y[0] ); EXPECT_EQ(4, y[1] ); - //FAIL(); } /** diff --git a/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp index c898128d..51eb6a5e 100644 --- a/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp +++ b/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp @@ -54,17 +54,25 @@ void * pthread_spmd( void * _data ) { &init ); EXPECT_EQ( rc, LPF_SUCCESS ); - FAIL(); + + rc = lpf_hook( init, &lpf_spmd, args ); + EXPECT_EQ( rc, LPF_SUCCESS ); + + rc = lpf_pthread_finalize( init ); + EXPECT_EQ( rc, LPF_SUCCESS ); return NULL; } +// the below tests for return code 134 as this is what aborted programs return +// as an error code on modern systems + /** * \test Tests lpf_hook on pthread implementation with NULL f_symbols * \pre P <= 1 * \pre P >= 1 * \return Message: NULL f_symbols argument while f_size is non-zero - * \return Exit code: 6 + * \return Exit code: 134 */ TEST( API, func_lpf_hook_null_f_symbols ) { diff --git a/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp index 04c7c355..051f8542 100644 --- a/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp +++ b/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include @@ -32,7 +32,7 @@ void lpf_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) { (void) ctx; (void) pid; (void) nprocs; (void) args; } void * pthread_spmd( void * _data ) { - EXPECT_NE( "%p", _data, NULL ); + EXPECT_NE( _data, (void*)NULL ); const struct thread_local_data data = * ((struct thread_local_data*) _data); const int pts_rc = pthread_setspecific( pid_key, _data ); @@ -46,61 +46,62 @@ void * pthread_spmd( void * _data ) { lpf_init_t init; lpf_err_t rc = LPF_SUCCESS; - EXPECT_EQ( "%d", pts_rc, 0 ); + EXPECT_EQ( pts_rc, 0 ); rc = lpf_pthread_initialize( (lpf_pid_t)data.s, (lpf_pid_t)data.P, &init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_hook( init, &lpf_spmd, args ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_pthread_finalize( init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); return NULL; } +// the below tests for return code 134 as this is what aborted programs return +// as an error code on modern systems + /** * \test Tests lpf_hook on pthread implementation with NULL input * \pre P <= 1 * \pre P >= 1 * \return Message: NULL input argument while input_size is non-zero - * \return Exit code: 6 + * \return Exit code: 134 */ -TEST( func_lpf_hook_null_input ) +TEST( API, func_lpf_hook_null_input ) { long k = 0; const long P = sysconf( _SC_NPROCESSORS_ONLN ); const int ptc_rc = pthread_key_create( &pid_key, NULL ); - EXPECT_EQ( "%d", ptc_rc, 0 ); + EXPECT_EQ( ptc_rc, 0 ); pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) ); - EXPECT_NE( "%p", threads, NULL ); + EXPECT_NE( threads, (pthread_t*)NULL ); struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) ); - EXPECT_NE( "%p", data, NULL ); + EXPECT_NE( data, (struct thread_local_data*)NULL ); for( k = 0; k < P; ++k ) { data[ k ].P = P; data[ k ].s = k; const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } for( k = 0; k < P; ++k ) { const int rval = pthread_join( threads[ k ], NULL ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } const int ptd_rc = pthread_key_delete( pid_key ); - EXPECT_EQ( "%d", ptd_rc, 0 ); - - return 0; + EXPECT_EQ( ptd_rc, 0 ); } diff --git a/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp index 02268258..eec3be9a 100644 --- a/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp +++ b/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include @@ -32,7 +32,7 @@ void lpf_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) { (void) ctx; (void) pid; (void) nprocs; (void) args; } void * pthread_spmd( void * _data ) { - EXPECT_NE( "%p", _data, NULL ); + EXPECT_NE( _data, (void*)NULL ); const struct thread_local_data data = * ((struct thread_local_data*) _data); const int pts_rc = pthread_setspecific( pid_key, _data ); @@ -46,61 +46,62 @@ void * pthread_spmd( void * _data ) { lpf_init_t init; lpf_err_t rc = LPF_SUCCESS; - EXPECT_EQ( "%d", pts_rc, 0 ); + EXPECT_EQ( pts_rc, 0 ); rc = lpf_pthread_initialize( (lpf_pid_t)data.s, (lpf_pid_t)data.P, &init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_hook( init, &lpf_spmd, args ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_pthread_finalize( init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); return NULL; } +// the below tests for return code 134 as this is what aborted programs return +// as an error code on modern systems + /** * \test Tests lpf_hook on pthread implementation with NULL output * \pre P <= 1 * \pre P >= 1 * \return Message: NULL output argument while output_size is non-zero - * \return Exit code: 6 + * \return Exit code: 134 */ -TEST( func_lpf_hook_null_output ) +TEST( API, func_lpf_hook_null_output ) { long k = 0; const long P = sysconf( _SC_NPROCESSORS_ONLN ); const int ptc_rc = pthread_key_create( &pid_key, NULL ); - EXPECT_EQ( "%d", ptc_rc, 0 ); + EXPECT_EQ( ptc_rc, 0 ); pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) ); - EXPECT_NE( "%p", threads, NULL ); + EXPECT_NE( threads, (pthread_t *)NULL ); struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) ); - EXPECT_NE( "%p", data, NULL ); + EXPECT_NE( data, (struct thread_local_data*)NULL ); for( k = 0; k < P; ++k ) { data[ k ].P = P; data[ k ].s = k; const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } for( k = 0; k < P; ++k ) { const int rval = pthread_join( threads[ k ], NULL ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } const int ptd_rc = pthread_key_delete( pid_key ); - EXPECT_EQ( "%d", ptd_rc, 0 ); - - return 0; + EXPECT_EQ( ptd_rc, 0 ); } diff --git a/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp index 20209c16..00bcc0c7 100644 --- a/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp +++ b/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include @@ -30,7 +30,7 @@ struct thread_local_data { void * pthread_spmd( void * _data ) { - EXPECT_NE( "%p", _data, NULL ); + EXPECT_NE( _data, (void*)NULL ); const struct thread_local_data data = * ((struct thread_local_data*) _data); const int pts_rc = pthread_setspecific( pid_key, _data ); @@ -44,61 +44,62 @@ void * pthread_spmd( void * _data ) { lpf_init_t init; lpf_err_t rc = LPF_SUCCESS; - EXPECT_EQ( "%d", pts_rc, 0 ); + EXPECT_EQ( pts_rc, 0 ); rc = lpf_pthread_initialize( (lpf_pid_t)data.s, (lpf_pid_t)data.P, &init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_hook( init, NULL, args ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_pthread_finalize( init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); return NULL; } +// the below tests for return code 134 as this is what aborted programs return +// as an error code on modern systems + /** * \test Tests lpf_hook on pthread implementation with NULL spmd * \pre P <= 1 * \pre P >= 1 * \return Message: NULL spmd argument - * \return Exit code: 6 + * \return Exit code: 134 */ -TEST( func_lpf_hook_null_spmd ) +TEST( API, func_lpf_hook_null_spmd ) { long k = 0; const long P = sysconf( _SC_NPROCESSORS_ONLN ); const int ptc_rc = pthread_key_create( &pid_key, NULL ); - EXPECT_EQ( "%d", ptc_rc, 0 ); + EXPECT_EQ( ptc_rc, 0 ); pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) ); - EXPECT_NE( "%p", threads, NULL ); + EXPECT_NE( threads, (pthread_t*)NULL ); struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) ); - EXPECT_NE( "%p", data, NULL ); + EXPECT_NE( data, (struct thread_local_data *)NULL ); for( k = 0; k < P; ++k ) { data[ k ].P = P; data[ k ].s = k; const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } for( k = 0; k < P; ++k ) { const int rval = pthread_join( threads[ k ], NULL ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } const int ptd_rc = pthread_key_delete( pid_key ); - EXPECT_EQ( "%d", ptd_rc, 0 ); - - return 0; + EXPECT_EQ( ptd_rc, 0 ); } diff --git a/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp b/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp index cb4da30e..d31498b3 100644 --- a/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp +++ b/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp @@ -75,3 +75,5 @@ TEST( API, func_lpf_debug_put_read_write_conflict_among_many ) rc = lpf_exec( LPF_ROOT, LPF_MAX_P, &spmd, LPF_NO_ARGS ); EXPECT_EQ( LPF_SUCCESS, rc ); } + + diff --git a/tests/functional/exception_list b/tests/functional/exception_list deleted file mode 100644 index c7590fc1..00000000 --- a/tests/functional/exception_list +++ /dev/null @@ -1,5 +0,0 @@ -func_lpf_put_parallel_bad_pattern_.* -func_lpf_hook_tcp_mpi..._[^_]*_mvapich2 -func_lpf_hook_tcp_mpi..._[^_]*_openmpi_gcc_64_1_10_7 -func_lpf_hook_tcp_timeout_mpi..._[^_]*_openmpi_gcc_64_1_10_7 -func_lpf_hook_tcp_mpi..._[^_]*_mpich_ge_gcc_64_3_2rc2 diff --git a/tests/functional/func_lpf_hook_simple.mpirma.cpp b/tests/functional/func_lpf_hook_simple.mpirma.cpp index 5dfa7104..81016a39 100644 --- a/tests/functional/func_lpf_hook_simple.mpirma.cpp +++ b/tests/functional/func_lpf_hook_simple.mpirma.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include @@ -28,11 +28,11 @@ void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) lpf_err_t rc = LPF_SUCCESS; rc = lpf_resize_message_queue( ctx, 2); - EXPECT_EQ( "%d", LPF_SUCCESS, rc ); + EXPECT_EQ( LPF_SUCCESS, rc ); rc = lpf_resize_memory_register( ctx, 2); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_sync(ctx, LPF_SYNC_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); int x = 5 - pid; int y = pid; @@ -41,21 +41,21 @@ void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) lpf_memslot_t ySlot = LPF_INVALID_MEMSLOT; rc = lpf_register_global( ctx, &x, sizeof(x), &xSlot ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_register_global( ctx, &y, sizeof(y), &ySlot ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_sync( ctx, LPF_SYNC_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_put( ctx, xSlot, 0, (pid + 1) % nprocs, ySlot, 0, sizeof(x), LPF_MSG_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_sync( ctx, LPF_SYNC_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); - EXPECT_EQ( "%d", x, (int) (5 - pid) ); - EXPECT_EQ( "%d", y, (int) (5 - (pid + nprocs -1) % nprocs) ); + EXPECT_EQ( x, (int) (5 - pid) ); + EXPECT_EQ( y, (int) (5 - (pid + nprocs -1) % nprocs) ); } // disable automatic initialization. @@ -66,7 +66,7 @@ const int LPF_MPI_AUTO_INITIALIZE=0; * \pre P >= 1 * \return Exit code: 0 */ -TEST( func_lpf_hook_simple_mpi ) +TEST(API, func_lpf_hook_simple_mpi) { lpf_err_t rc = LPF_SUCCESS; MPI_Init(NULL, NULL); @@ -79,16 +79,15 @@ TEST( func_lpf_hook_simple_mpi ) lpf_init_t init; rc = lpf_mpi_initialize_with_mpicomm( MPI_COMM_WORLD, &init); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_hook( init, &spmd, LPF_NO_ARGS ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_mpi_finalize( init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); MPI_Finalize(); - return 0; } diff --git a/tests/functional/func_lpf_hook_simple.pthread.cpp b/tests/functional/func_lpf_hook_simple.pthread.cpp index 3b33bdc6..6438b676 100644 --- a/tests/functional/func_lpf_hook_simple.pthread.cpp +++ b/tests/functional/func_lpf_hook_simple.pthread.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include @@ -36,18 +36,18 @@ struct thread_local_data { void lpf_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) { (void) ctx; - const struct thread_local_data * const data = pthread_getspecific( pid_key ); - - EXPECT_EQ( "%zd", (size_t)nprocs, (size_t)(data->P) ); - EXPECT_EQ( "%zd", (size_t)pid, (size_t)(data->s) ); - EXPECT_EQ( "%zd", (size_t)(args.input_size), (size_t)(sizeof( struct thread_local_data)) ); - EXPECT_EQ( "%zd", (size_t)(args.output_size), (size_t)0 ); - EXPECT_EQ( "%p", args.input, data ); - EXPECT_EQ( "%p", args.output, NULL ); + const struct thread_local_data * const data = static_cast(pthread_getspecific( pid_key )); + + EXPECT_EQ( (size_t)nprocs, (size_t)(data->P) ); + EXPECT_EQ( (size_t)pid, (size_t)(data->s) ); + EXPECT_EQ( (size_t)(args.input_size), (size_t)(sizeof( struct thread_local_data)) ); + EXPECT_EQ( (size_t)(args.output_size), (size_t)0 ); + EXPECT_EQ( args.input, data ); + EXPECT_EQ( args.output, nullptr ); } void * pthread_spmd( void * _data ) { - EXPECT_NE( "%p", _data, NULL ); + EXPECT_NE( _data, nullptr); const struct thread_local_data data = * ((struct thread_local_data*) _data); const int pts_rc = pthread_setspecific( pid_key, _data ); @@ -61,20 +61,20 @@ void * pthread_spmd( void * _data ) { lpf_init_t init; lpf_err_t rc = LPF_SUCCESS; - EXPECT_EQ( "%d", pts_rc, 0 ); + EXPECT_EQ( pts_rc, 0 ); rc = lpf_pthread_initialize( (lpf_pid_t)data.s, (lpf_pid_t)data.P, &init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_hook( init, &lpf_spmd, args ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_pthread_finalize( init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); return NULL; } @@ -85,36 +85,35 @@ void * pthread_spmd( void * _data ) { * \pre P >= 1 * \return Exit code: 0 */ -TEST( func_lpf_hook_simple_pthread ) +TEST(API, func_lpf_hook_simple_pthread ) { long k = 0; const long P = sysconf( _SC_NPROCESSORS_ONLN ); const int ptc_rc = pthread_key_create( &pid_key, NULL ); - EXPECT_EQ( "%d", ptc_rc, 0 ); + EXPECT_EQ( ptc_rc, 0 ); pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) ); - EXPECT_NE( "%p", threads, NULL ); + EXPECT_NE( threads, nullptr ); struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) ); - EXPECT_NE( "%p", data, NULL ); + EXPECT_NE( data, nullptr ); for( k = 0; k < P; ++k ) { data[ k ].P = P; data[ k ].s = k; const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } for( k = 0; k < P; ++k ) { const int rval = pthread_join( threads[ k ], NULL ); - EXPECT_EQ( "%d", rval, 0 ); + EXPECT_EQ( rval, 0 ); } const int ptd_rc = pthread_key_delete( pid_key ); - EXPECT_EQ( "%d", ptd_rc, 0 ); + EXPECT_EQ( ptd_rc, 0 ); - return 0; } diff --git a/tests/functional/func_lpf_hook_subset.mpimsg.cpp b/tests/functional/func_lpf_hook_subset.mpimsg.cpp index f073e443..6693bab3 100644 --- a/tests/functional/func_lpf_hook_subset.mpimsg.cpp +++ b/tests/functional/func_lpf_hook_subset.mpimsg.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include @@ -39,10 +39,10 @@ void subset_func(MPI_Comm comm) lpf_init_t init; lpf_err_t rc = lpf_mpi_initialize_with_mpicomm(comm, &init); - EXPECT_EQ( "%d", LPF_SUCCESS, rc ); + EXPECT_EQ( LPF_SUCCESS, rc ); rc = lpf_hook(init, test_spmd, LPF_NO_ARGS); - EXPECT_EQ( "%d", LPF_SUCCESS, rc ); + EXPECT_EQ( LPF_SUCCESS, rc ); } /** @@ -50,7 +50,7 @@ void subset_func(MPI_Comm comm) * \pre P >= 3 * \return Exit code: 0 */ -TEST( func_lpf_hook_subset ) +TEST(API, func_lpf_hook_subset ) { MPI_Init(NULL, NULL); @@ -71,5 +71,5 @@ TEST( func_lpf_hook_subset ) MPI_Barrier(MPI_COMM_WORLD); // Paranoid barrier MPI_Finalize(); - return 0; + } diff --git a/tests/functional/func_lpf_hook_tcp.mpirma.cpp b/tests/functional/func_lpf_hook_tcp.mpirma.cpp index 2921e6fc..0d7f0290 100644 --- a/tests/functional/func_lpf_hook_tcp.mpirma.cpp +++ b/tests/functional/func_lpf_hook_tcp.mpirma.cpp @@ -17,28 +17,35 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include +static int myargc; +static char **myargv; + +// disable automatic initialization. +const int LPF_MPI_AUTO_INITIALIZE=0; + + void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) { lpf_err_t rc = LPF_SUCCESS; struct { int pid, nprocs; } params; - EXPECT_EQ( "%lu", sizeof(params), args.input_size ); + EXPECT_EQ( sizeof(params), args.input_size ); memcpy( ¶ms, args.input, sizeof(params)); - EXPECT_EQ( "%u", (lpf_pid_t) params.pid, pid ); - EXPECT_EQ( "%u", (lpf_pid_t) params.nprocs, nprocs ); + EXPECT_EQ( (lpf_pid_t) params.pid, pid ); + EXPECT_EQ( (lpf_pid_t) params.nprocs, nprocs ); rc = lpf_resize_message_queue( ctx, 2); - EXPECT_EQ( "%d", LPF_SUCCESS, rc ); + EXPECT_EQ( LPF_SUCCESS, rc ); rc = lpf_resize_memory_register( ctx, 2); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_sync(ctx, LPF_SYNC_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); int x = 5 - pid; int y = pid; @@ -47,25 +54,23 @@ void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args ) lpf_memslot_t ySlot = LPF_INVALID_MEMSLOT; rc = lpf_register_global( ctx, &x, sizeof(x), &xSlot ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_register_global( ctx, &y, sizeof(y), &ySlot ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_sync( ctx, LPF_SYNC_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_put( ctx, xSlot, 0, (pid + 1) % nprocs, ySlot, 0, sizeof(x), LPF_MSG_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_sync( ctx, LPF_SYNC_DEFAULT ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); - EXPECT_EQ( "%d", x, (int) (5 - pid) ); - EXPECT_EQ( "%d", y, (int) (5 - (pid + nprocs -1) % nprocs) ); + EXPECT_EQ( x, (int) (5 - pid) ); + EXPECT_EQ( y, (int) (5 - (pid + nprocs -1) % nprocs) ); } -// disable automatic initialization. -const int LPF_MPI_AUTO_INITIALIZE=0; /** * \test Tests lpf_hook on mpi implementation using TCP/IP to initialize. The pids and nprocs are checked for their correctness. @@ -73,15 +78,14 @@ const int LPF_MPI_AUTO_INITIALIZE=0; * \return Exit code: 0 * \note Independent processes: yes */ -TEST( func_lpf_hook_tcp ) +TEST( API, func_lpf_hook_tcp_mpirma ) { lpf_err_t rc = LPF_SUCCESS; - MPI_Init(&argc, &argv); struct { int pid, nprocs; } params = { 0, 0}; - EXPECT_GT("%d", argc, 2 ); - params.pid = atoi( argv[1] ); - params.nprocs = atoi( argv[2] ); + EXPECT_GT( myargc, 2 ); + params.pid = atoi( myargv[1] ); + params.nprocs = atoi( myargv[2] ); lpf_init_t init; rc = lpf_mpi_initialize_over_tcp( @@ -89,7 +93,7 @@ TEST( func_lpf_hook_tcp ) params.pid, params.nprocs, &init); // let e.g. Intel MPI try a few // alternative fabrics - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); lpf_args_t args; args.input = ¶ms; @@ -100,13 +104,20 @@ TEST( func_lpf_hook_tcp ) args.f_size = 0; rc = lpf_hook( init, &spmd, args ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); rc = lpf_mpi_finalize( init ); - EXPECT_EQ( "%d", rc, LPF_SUCCESS ); + EXPECT_EQ( rc, LPF_SUCCESS ); MPI_Finalize(); - return 0; +} + +int main(int argc, char **argv) { + myargc = argc; + myargv = argv; + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); + } diff --git a/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp b/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp index e8aba501..94d3edd6 100644 --- a/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp +++ b/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp @@ -17,7 +17,7 @@ #include #include -#include "Test.h" +#include "gtest/gtest.h" #include #include @@ -31,7 +31,7 @@ const int LPF_MPI_AUTO_INITIALIZE=0; * \pre P <= 100 * \return Exit code: 1 */ -TEST( func_lpf_hook_tcp_timeout_mpi ) +TEST(API, func_lpf_hook_tcp_timeout_mpi ) { MPI_Init(NULL, NULL); @@ -45,9 +45,8 @@ TEST( func_lpf_hook_tcp_timeout_mpi ) "localhost", "9325", 999, pid, nprocs, &init); - EXPECT_EQ( "%d", rc, LPF_ERR_FATAL ); + EXPECT_EQ( rc, LPF_ERR_FATAL ); - return 0; } diff --git a/tests/functional/func_lpf_put_parallel_bad_pattern.cpp b/tests/functional/func_lpf_put_parallel_bad_pattern.cpp deleted file mode 100644 index fe1d8f48..00000000 --- a/tests/functional/func_lpf_put_parallel_bad_pattern.cpp +++ /dev/null @@ -1,100 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include "gtest/gtest.h" - -void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args) -{ - (void) args; // ignore args parameter - - lpf_err_t rc = LPF_SUCCESS; - const unsigned n = sqrt(nprocs); - unsigned i; - unsigned * xs, *ys; - ys = (unsigned *) malloc( sizeof(ys[0]) * n); - xs = (unsigned *) malloc( sizeof(xs[0]) * n); - for (i = 0; i < n; ++i) - { - xs[i] = i; - ys[i] = 0; - } - - rc = lpf_resize_message_queue( lpf, n); - EXPECT_EQ( LPF_SUCCESS, rc ); - rc = lpf_resize_memory_register( lpf, 2 ); - EXPECT_EQ( LPF_SUCCESS, rc ); - rc = lpf_sync( lpf, LPF_SYNC_DEFAULT ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - lpf_memslot_t xslot = LPF_INVALID_MEMSLOT; - lpf_memslot_t yslot = LPF_INVALID_MEMSLOT; - rc = lpf_register_local( lpf, xs, sizeof(xs[0]) * n, &xslot ); - EXPECT_EQ( LPF_SUCCESS, rc ); - rc = lpf_register_global( lpf, ys, sizeof(ys[0]) * n, &yslot ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - rc = lpf_sync( lpf, LPF_SYNC_DEFAULT); - EXPECT_EQ( LPF_SUCCESS, rc ); - - // Check that data is OK. - for (i = 0; i < n; ++i) - { - EXPECT_EQ( i, xs[i] ); - EXPECT_EQ( 0u, ys[i] ); - } - - if ( pid < n ) - { - for ( i = 0; i < n; ++ i) - { - EXPECT_LT( i*n, nprocs); - rc = lpf_put( lpf, xslot, sizeof(xs[0])*i, - i*n, yslot, sizeof(ys[0])*pid, sizeof(xs[0]), - LPF_MSG_DEFAULT ); - EXPECT_EQ( LPF_SUCCESS, rc ); - } - } - - - rc = lpf_sync( lpf, LPF_SYNC_DEFAULT ); - EXPECT_EQ( LPF_SUCCESS, rc ); - - for (i = 0; i < n; ++i) - { - EXPECT_EQ( i, xs[i] ); - if ( pid % n == 0 && pid < n*n) - EXPECT_EQ( pid / n, ys[i] ); - else - EXPECT_EQ( 0, ys[i] ); - } - -} - -/** - * \test Test lpf_put by doing a pattern which bad for a sparse all-to-all - * \pre P >= 5 - * \pre P <= 5 - * \return Exit code: 0 - */ -TEST( API, func_lpf_put_parallel_bad_pattern ) -{ - lpf_err_t rc = lpf_exec( LPF_ROOT, LPF_MAX_P, spmd, LPF_NO_ARGS); - EXPECT_EQ( LPF_SUCCESS, rc ); -} diff --git a/tests/functional/func_lpf_test_noc_ring.cpp b/tests/functional/func_lpf_test_noc_ring.cpp new file mode 100644 index 00000000..1050b68e --- /dev/null +++ b/tests/functional/func_lpf_test_noc_ring.cpp @@ -0,0 +1,85 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mpi.h" +#include +#include +#include "gtest/gtest.h" + +void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args) +{ + (void) args; // ignore args parameter + + lpf_err_t rc = LPF_SUCCESS; + + char buf1[30] = {'\0'}; + char buf2[30] = {'\0'}; + + strcpy(buf1, "HELLO"); + + rc = lpf_resize_memory_register(lpf, 2); // identical to lpf_noc_resize at the moment + EXPECT_EQ( LPF_SUCCESS, rc ); + EXPECT_EQ( LPF_SUCCESS, rc ); + rc = lpf_resize_message_queue( lpf, 2); + EXPECT_EQ( LPF_SUCCESS, rc ); + + rc = lpf_sync( lpf, LPF_SYNC_DEFAULT ); + EXPECT_EQ( LPF_SUCCESS, rc ); + + lpf_memslot_t xslot = LPF_INVALID_MEMSLOT; + lpf_memslot_t yslot = LPF_INVALID_MEMSLOT; + rc = lpf_register_local( lpf, buf1, sizeof(buf1), &xslot ); + EXPECT_EQ( LPF_SUCCESS, rc ); + rc = lpf_noc_register( lpf, buf2, sizeof(buf2), &yslot ); + EXPECT_EQ( LPF_SUCCESS, rc ); + + + int left = (nprocs + pid - 1) % nprocs; + int right = ( pid + 1) % nprocs; + + char * buffer; + size_t bufferSize; + lpf_noc_serialize_slot(lpf, yslot, &buffer, &bufferSize); + char rmtBuff[bufferSize]; + + MPI_Sendrecv(buffer, bufferSize, MPI_BYTE, left, 0, rmtBuff, bufferSize, MPI_BYTE, right, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + rc = lpf_noc_deserialize_slot(lpf, rmtBuff, yslot); + EXPECT_EQ( LPF_SUCCESS, rc ); + rc = lpf_noc_put(lpf, xslot, 0, right, yslot, 0, sizeof(buf1), LPF_MSG_DEFAULT); + EXPECT_EQ( LPF_SUCCESS, rc ); + rc = lpf_sync(lpf, LPF_SYNC_DEFAULT); + EXPECT_EQ( LPF_SUCCESS, rc ); + EXPECT_EQ(std::string(buf2), std::string(buf1)); + rc = lpf_deregister(lpf, xslot); + EXPECT_EQ( LPF_SUCCESS, rc ); + rc = lpf_noc_deregister(lpf, yslot); + EXPECT_EQ( LPF_SUCCESS, rc ); + +} + +/** + * \test Testing NOC functionality + * \pre P >= 2 + * \pre P <= 2 + * \return Exit code: 0 + */ +TEST( API, func_lpfAPI_test_noc_ring ) +{ + lpf_err_t rc = lpf_exec( LPF_ROOT, LPF_MAX_P, spmd, LPF_NO_ARGS); + EXPECT_EQ( LPF_SUCCESS, rc ); +}