diff --git a/CMakeLists.txt b/CMakeLists.txt
index e0850a84..eb12c8bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,12 +52,12 @@ set(CPACK_PACKAGE_VERSION_MAJOR "${VERSION_MAJOR}")
set(CPACK_PACKAGE_VERSION_MINOR "${VERSION_MINOR}")
set(CPACK_PACKAGE_VERSION_PATCH "${VERSION_PATCH}")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
- "A high performance BSP communications library" )
+ "A high performance BSP communications library" )
set(CPACK_SOURCE_GENERATOR "TGZ" )
set(CPACK_SOURCE_IGNORE_FILES "/\\\\.git/" "/\\\\.svn/" "\\\\.swp$" "/site/" "/build/" "/pclint/" "/junit/" "/ideas/" )
set(CPACK_SOURCE_PACKAGE_FILE_NAME
- "LPF-${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}-${VERSION_PACKAGE}")
+ "LPF-${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}-${VERSION_PACKAGE}")
set(CPACK_GENERATOR "RPM")
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
@@ -183,10 +183,29 @@ endif()
#enable the hybrid engine
if ( LIB_POSIX_THREADS AND LIB_MATH AND LIB_DL AND MPI_FOUND
- AND MPI_IS_THREAD_COMPAT AND MPI_IS_NOT_OPENMPI1
- AND ENABLE_IBVERBS )
- list(APPEND ENGINES "hybrid")
- set(HYBRID_ENGINE_ENABLED on)
+ AND MPI_IS_THREAD_COMPAT AND MPI_IS_NOT_OPENMPI1 )
+ if( ENABLE_IBVERBS )
+ set(LPFLIB_HYBRID_MPI_ENGINE "ibverbs" CACHE STRING
+ "Choice of MPI engine to use for inter-process communication")
+ list(APPEND ENGINES "hybrid")
+ set(HYBRID_ENGINE_ENABLED on)
+ elseif( MPI_RMA )
+ set(LPFLIB_HYBRID_MPI_ENGINE "mpirma" CACHE STRING
+ "Choice of MPI engine to use for inter-process communication")
+ list(APPEND ENGINES "hybrid")
+ set(HYBRID_ENGINE_ENABLED on)
+ elseif( LIB_MATH AND LIB_DL AND MPI_FOUND )
+ set(LPFLIB_HYBRID_MPI_ENGINE "mpimsg" CACHE STRING
+ "Choice of MPI engine to use for inter-process communication")
+ list(APPEND ENGINES "hybrid")
+ set(HYBRID_ENGINE_ENABLED on)
+ endif()
+ if( HYBRID_ENGINE_ENABLED )
+ message( "Hybrid engine will be built using the ${LPFLIB_HYBRID_MPI_ENGINE} engine" )
+ else()
+ message( "No suitable inter-node communication engine found; "
+ "hybrid engine will not be built" )
+ endif()
endif()
message( STATUS "The following engines will be built: ${ENGINES}")
@@ -209,6 +228,7 @@ endif()
# When system is not Linux, enable conditionally compiled blocks
if (APPLE)
+ message( WARNING "LPF compilation on OS X is not regularly tested" )
add_definitions(-DLPF_ON_MACOS=1)
endif()
@@ -233,8 +253,8 @@ option(LPF_ENABLE_TESTS
"Enable unit and API tests. This uses Google Testing and Mocking Framework"
OFF)
option(GTEST_AGREE_TO_LICENSE
- "Does the user agree to the GoogleTest license"
- OFF)
+ "Does the user agree to the GoogleTest license"
+ OFF)
# C++ standard -- Google tests require newer C++ standard than C++11
if (LPF_ENABLE_TESTS)
@@ -312,14 +332,43 @@ endfunction(target_compile_flags)
# Source
set(lpf_cflags)
set(lpf_lib_link_flags)
-set(lpf_exe_link_flags "-rdynamic")
+set(lpf_exe_link_flags)
+
+# Populate lpf_cflags, lpf_lib_link_flags, lpf_exe_link_flags according to
+# (enabled) engine requirements
+# - 0) PThreads engine needs nothing special
+# - 1) MPI-based engines:
+if ( LIB_MATH AND LIB_DL AND MPI_FOUND )
+ # -fPIC and -rdynamic are necessary to ensure that symbols can be
+ # looked up by dlsym which is the mechanism lpf_exec uses to broadcast the
+ # function that should be executed
+ set(rdyn_lflag "-rdynamic")
+ if (APPLE)
+ # OS X does not support -rdynamic
+ set(rdyn_lflag "")
+ endif ()
+
+ # include flags:
+ set( mpi_include_flags )
+ string( REPLACE ";" " -I" mpi_include_flags "${MPI_C_INCLUDE_PATH}" )
+ set(lpf_cflags "${lpf_cflags} -I${mpi_include_flags} -fPIC")
+
+ # linker flags:
+ set(lib_lflags "${MPI_C_LINK_FLAGS}") #Note: the core library is already linked with MPI_C_LIBRARIES.
+ string(REPLACE ";" " " lib_lflags "${lib_lflags}") # So, no need to also link executables with it.
+ set(lpf_lib_link_flags "${lpf_lib_link_flags} ${lib_lflags} ${rdyn_lflag}")
+
+ # executable linker flags:
+ set(lpf_exe_link_flags "${lpf_exe_link_flags} ${rdyn_lflag}")
+endif ()
+# ...add requirements from other engines here...
# Collating all compile & link flags
set(LPF_CORE_COMPILE_FLAGS "${lpf_cflags}" CACHE STRING "Compilation flags for all user code" )
set(LPF_CORE_LIB_LINK_FLAGS "${lpf_lib_link_flags}" CACHE STRING "Flags to link user libraries" )
set(LPF_CORE_EXE_LINK_FLAGS "${lpf_exe_link_flags}" CACHE STRING "Flags to link user executables" )
-# Compiling LPF programmes in the build dir
+# Compiling LPF programs in the build dir
function( target_link_exe_with_core target )
set(engine "imp")
if (ARGV1)
@@ -343,10 +392,12 @@ if (LPF_ENABLE_TESTS)
message(STATUS "Unit and API tests will be built. This requires CMake version 3.29 or higher, since we use recent features of the GoogleTest package in CMake.")
if (NOT GTEST_AGREE_TO_LICENSE)
- message(FATAL_ERROR "The user needs to agree with the GoogleTest license to use tests (option GTEST_AGREE_TO_LICENSE=TRUE)")
+ message(FATAL_ERROR "The user needs to agree with the GoogleTest license to use tests (option GTEST_AGREE_TO_LICENSE=TRUE)")
endif()
# Enable testing in CMake
enable_testing()
+ include(ProcessorCount)
+ ProcessorCount(processorCount)
find_package(GTest)
include(GoogleTest)
if(NOT GTest_FOUND) # if not found, download it and pull it in
@@ -367,64 +418,84 @@ if (LPF_ENABLE_TESTS)
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/junit)
set(test_output "${CMAKE_BINARY_DIR}/junit")
- set(MY_TEST_LAUNCHER ${CMAKE_BINARY_DIR}/test_launcher.py)
- configure_file( ${CMAKE_SOURCE_DIR}/test_launcher.py ${MY_TEST_LAUNCHER} @ONLY FILE_PERMISSIONS WORLD_EXECUTE OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ)
- if( NOT Python3_FOUND )
- find_package( Python3 REQUIRED)
- endif()
+ find_package( Python3 REQUIRED COMPONENTS Interpreter)
+ set(MY_TEST_LAUNCHER ${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/test_launcher.py)
+ configure_file( ${CMAKE_SOURCE_DIR}/test_launcher.py.in ${CMAKE_BINARY_DIR}/test_launcher.py @ONLY FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ)
# Macro for adding a new GoogleTest test
function(add_gtest testName ENGINE debug testSource )
- if ("{$ENGINE}" STREQUAL "")
- message(FATAL_ERROR "engine cannot be empty, ever!")
- endif()
- add_executable(${testName} ${testSource} ${ARGN})
- target_compile_definitions(${testName} PUBLIC LPF_CORE_IMPL_ID=${ENGINE})
- target_compile_definitions(${testName} PUBLIC LPF_CORE_MPI_USES_${ENGINE})
- if (debug)
- target_include_directories( ${testName} BEFORE PRIVATE ${CMAKE_SOURCE_DIR}/include/debug )
- target_link_libraries(${testName} lpf_debug lpf_hl_debug GTest::gtest GTest::gtest_main)
- else(debug)
- target_link_libraries(${testName} GTest::gtest GTest::gtest_main)
- endif(debug)
-
-
- # Extract test-specific information from comments of tests
- file(READ ${testSource} fileContents)
- string(REGEX MATCH "Exit code: ([0-9]+)" _ ${fileContents})
- set(retCode ${CMAKE_MATCH_1})
- string(REGEX MATCH "pre P >= ([0-9]+)" _ ${fileContents})
- set(minProcs ${CMAKE_MATCH_1})
- string(REGEX MATCH "pre P <= ([0-9]+)" _ ${fileContents})
- set(maxProcs ${CMAKE_MATCH_1})
- string(REGEX MATCH "-probe ([0-9]+.[0-9]+)" _ ${fileContents})
- set(lpfProbeSecs ${CMAKE_MATCH_1})
-
- target_link_exe_with_core(${testName} ${ENGINE})
-
-
- if ("${minProcs}" STREQUAL "")
- set(minProcs "1")
- endif()
- if ("${maxProcs}" STREQUAL "")
- set(maxProcs "5")
- endif()
- if ("${lpfProbeSecs}" STREQUAL "")
- set(lpfProbeSecs "0.0")
- endif()
- if ("${retCode}" STREQUAL "")
- set(retCode "0")
- endif()
-
- # Most recent approach to Gtests, recommended!
- set_property(TARGET ${testName} PROPERTY TEST_LAUNCHER ${MY_TEST_LAUNCHER};-e;${ENGINE};-L;${CMAKE_BINARY_DIR}/lpfrun_build;-p;${minProcs};-P;${maxProcs};-t;${lpfProbeSecs};-R;${retCode})
- gtest_discover_tests(${testName}
- TEST_PREFIX ${ENGINE}_
- EXTRA_ARGS --gtest_output=xml:${test_output}/${ENGINE}_${testName}
- DISCOVERY_MODE POST_BUILD
- DISCOVERY_TIMEOUT 15
- )
+ if ("{$ENGINE}" STREQUAL "")
+ message(FATAL_ERROR "engine cannot be empty, ever!")
+ endif()
+ add_executable(${testName} ${testSource} ${ARGN})
+ target_compile_definitions(${testName} PUBLIC LPF_CORE_IMPL_ID=${ENGINE})
+ target_compile_definitions(${testName} PUBLIC LPF_CORE_MPI_USES_${ENGINE})
+ if (debug)
+ target_include_directories( ${testName} BEFORE PRIVATE ${CMAKE_SOURCE_DIR}/include/debug )
+ target_link_libraries(${testName} lpf_debug lpf_hl_debug GTest::gtest GTest::gtest_main)
+ else(debug)
+ target_link_libraries(${testName} GTest::gtest GTest::gtest_main)
+ endif(debug)
+
+
+ # Extract test-specific information from comments of tests
+ file(READ ${testSource} fileContents)
+ string(REGEX MATCH "Exit code: ([0-9]+)" _ ${fileContents})
+ set(retCode ${CMAKE_MATCH_1})
+ string(REGEX MATCH "pre P >= ([0-9]+)" _ ${fileContents})
+ set(minProcs ${CMAKE_MATCH_1})
+ string(REGEX MATCH "pre P <= ([0-9]+)" _ ${fileContents})
+ set(maxProcs ${CMAKE_MATCH_1})
+ string(REGEX MATCH "-probe ([0-9]+.[0-9]+)" _ ${fileContents})
+ set(lpfProbeSecs ${CMAKE_MATCH_1})
+
+ target_link_exe_with_core(${testName} ${ENGINE})
+
+ # The "\pre P <= max" comment in a test indicates the desired number of
+ # maximum LPF processes. If the test does not define a desired number of
+ # maximum LPF processes, it will be set to 5.
+ #
+ # The "\pre P >= min" comment in a test indicates the desired number of
+ # minimum LPF processes. If the test does not define a desired minimum
+ # number of LPF processes, it will be set to 1.
+ #
+ # Let 'processorCount' be the detected number of processors by the system.
+ # If this number is smaller than the desider minimum and/or maximum number
+ # of processes, it overwrites these
+ #
+ # Most tests only define a mininum number of desired processes, such as
+ # "\pre P >= 1". In those cases, the test will execute for the range 1,..,5
+ # (including)
+
+ if ("${minProcs}" STREQUAL "")
+ set(minProcs "1")
+ endif()
+ if ("${maxProcs}" STREQUAL "")
+ set(maxProcs "5")
+ endif()
+ # cap min with processorCount, if needed
+ if ("${minProcs}" GREATER "${processorCount}")
+ set(minProcs ${processorCount})
+ endif()
+ # cap max with processorCount, if needed
+ if ("${maxProcs}" GREATER "${processorCount}")
+ set(maxProcs ${processorCount})
+ endif()
+ if ("${lpfProbeSecs}" STREQUAL "")
+ set(lpfProbeSecs "0.0")
+ endif()
+ if ("${retCode}" STREQUAL "")
+ set(retCode "0")
+ endif()
+ # Most recent approach to Gtests, recommended!
+ set_property(TARGET ${testName} PROPERTY TEST_LAUNCHER ${MY_TEST_LAUNCHER};--engine;${ENGINE};--parallel_launcher;${CMAKE_BINARY_DIR}/lpfrun_build;--min_process_count;${minProcs};--max_process_count;${maxProcs};--lpf_probe_timer;${lpfProbeSecs};--expected_return_code;${retCode})
+ gtest_discover_tests(${testName}
+ TEST_PREFIX ${ENGINE}_
+ EXTRA_ARGS --gtest_output=xml:${test_output}/${ENGINE}_${testName}
+ DISCOVERY_MODE POST_BUILD
+ DISCOVERY_TIMEOUT 15
+ )
endfunction(add_gtest)
@@ -436,10 +507,11 @@ else(LPF_ENABLE_TESTS)
endif(LPF_ENABLE_TESTS)
+# Main LPF library includes and sources
include_directories(include)
include_directories(src/common)
-
add_subdirectory(src)
+
# Apps
add_subdirectory(src/utils)
diff --git a/README b/README
index 626173c7..26b0300b 100644
--- a/README
+++ b/README
@@ -38,6 +38,10 @@ Optional MPI engine requires
Optional for thread pinning by Pthreads and hybrid engines
- hwloc > 1.11
+Optional tests requires
+ - GNU C++ compiler (C++17 compatible),
+ - Python 3.
+
Optional (see --enable-doc) documentation requires
- doxygen > 1.5.6,
- graphviz,
diff --git a/bootstrap.sh b/bootstrap.sh
index 60c1ec26..14628772 100755
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -192,7 +192,7 @@ EOF
--with-mpiexec=*)
mpiexec="${arg#--with-mpiexec=}"
- mpi_cmake_flags="${mpi_cmake_flags} -DMPIEXEC=$mpiexec"
+ mpi_cmake_flags="${mpi_cmake_flags} -DMPIEXEC=$mpiexec -DMPIEXEC_EXECUTABLE=$mpiexec"
shift;
;;
@@ -288,8 +288,8 @@ ${CMAKE_EXE} -Wno-dev \
-DLPF_HWLOC="${hwloc}" \
$hwloc_found_flag \
$mpi_cmake_flags \
- "$extra_flags" \
- "$perf_flags" \
+ ${extra_flags+"$extra_flags"} \
+ ${perf_flags+"$perf_flags"} \
"$@" $srcdir \
|| { echo FAIL "Failed to configure LPF; Please check your chosen configuration"; exit 1; }
diff --git a/cmake/mpi.cmake b/cmake/mpi.cmake
index bd7ca9a5..f8d55851 100644
--- a/cmake/mpi.cmake
+++ b/cmake/mpi.cmake
@@ -170,8 +170,17 @@ try_run( IBVERBS_INIT_RUNS IBVERBS_INIT_COMPILES
endif()
set(ENABLE_IBVERBS FALSE)
-if (LIB_IBVERBS AND NOT IBVERBS_INIT_RUNS STREQUAL "FAILED_TO_RUN")
- set(ENABLE_IBVERBS TRUE)
+if (LPF_ENABLE_TESTS)
+ # The Google Test integration requires that tests successfully compiled are
+ # also runnable
+ if (LIB_IBVERBS AND NOT IBVERBS_INIT_RUNS STREQUAL "FAILED_TO_RUN")
+ set(ENABLE_IBVERBS TRUE)
+ endif()
+else()
+ # Without the aforementioned Google Test requirement, we can safely build
+ # it and allow the user to deploy the built binaries on IB-enabled nodes.
+ if (LIB_IBVERBS)
+ set(ENABLE_IBVERBS TRUE)
+ endif()
endif()
-
diff --git a/doc/lpf_core.cfg.in b/doc/lpf_core.cfg.in
index 0a8de71c..bfb940b2 100644
--- a/doc/lpf_core.cfg.in
+++ b/doc/lpf_core.cfg.in
@@ -742,8 +742,8 @@ INPUT = @PROJECT_SOURCE_DIR@/include/lpf/core.h \
@PROJECT_SOURCE_DIR@/include/bsp/bsp.h \
@PROJECT_SOURCE_DIR@/include/lpf/hybrid.h \
@PROJECT_SOURCE_DIR@/include/lpf/mpirpc-client.h \
- @PROJECT_SOURCE_DIR@/include/lpf/rpc-client.h
-
+ @PROJECT_SOURCE_DIR@/include/lpf/rpc-client.h \
+ @PROJECT_SOURCE_DIR@/include/lpf/abort.h
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/include/lpf/abort.h b/include/lpf/abort.h
new file mode 100644
index 00000000..383a6ab8
--- /dev/null
+++ b/include/lpf/abort.h
@@ -0,0 +1,152 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LPFLIB_ABORT_H
+#define LPFLIB_ABORT_H
+
+#include "lpf/static_dispatch.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \addtogroup LPF_EXTENSIONS LPF API extensions
+ * @{
+ *
+ * \defgroup LPF_ABORT Functionality for aborting LPF applications
+ *
+ * If #LPF_HAS_ABORT has a nonzero value, then a call to #lpf_abort from any
+ * process in a distributed application, will abort the entire application.
+ *
+ * \note As with all LPF extensions, it is \em not mandatory for all LPF
+ * implementations to support this one.
+ *
+ * If #LPF_HAS_ABORT has a zero value, then a call to #lpf_abort shall have no
+ * other effect than it returning #LPF_SUCCESS.
+ *
+ * Therefore,
+ * - LPF implementations that cannot support an abort functionality may still
+ * provide a valid, albeit trivial, implementation of this extension.
+ * - LPF applications that aim to rely on #lpf_abort should first ensure that
+ * #LPF_HAS_ABORT is nonzero.
+ *
+ * \warning Portable LPF implementations best not rely on #lpf_abort at all.
+ * Although sometimes unavoidable, the recommendation is to avoid the
+ * use of this extension as best as possible.
+ *
+ * \note One case where #lpf_abort is absolutely required is for \em testing an
+ * LPF debug layer. Such a layer should detect erroneous usage, report it,
+ * but then typically cannot continue execution. In this case, relying on
+ * the standard abort or exit functionalities to terminate the process the
+ * error was detected at, typically results in implementation-specific
+ * (i.e., undefined) behaviour with regards to how the application at
+ * large terminates. This means that a test-suite for such a debug layer
+ * cannot reliably detect whether a distributed application has terminated
+ * for the expected reasons. In this case, #lpf_abort provides a reliable
+ * mechanism that such a test requires.
+ *
+ * @{
+ */
+
+/**
+ * Whether the active LPF engine supports aborting distributed applications.
+ *
+ * If the value of this field is zero (0), then a call to #lpf_abort will be a
+ * no-op and always return #LPF_SUCCESS.
+ */
+extern _LPFLIB_VAR const int LPF_HAS_ABORT ;
+
+/**
+ * A call to this function aborts the distributed application as soon as
+ * possible.
+ *
+ * \warning This function corresponds to a no-op if #LPF_HAS_ABORT equals zero.
+ *
+ * The below specification only applies when #LPF_HAS_ABORT contains a non-zero
+ * value; otherwise, a call to this function will have no other effect besides
+ * returning #LPF_SUCCESS.
+ *
+ * \note Rationale: the capability to abort relies on the software stack that
+ * underlies LPF, and in aiming to be a minimal API, LPF does not wish to
+ * force such a capabilities unto the underlying software or system.
+ *
+ * \note Applications that rely on #lpf_abort therefore should first check if
+ * the capability is supported.
+ *
+ * \note The recommended way to abort LPF applications that is fully supported
+ * by the core specification alone (i.e., excluding this #lpf_abort
+ * extension), is to simply exit the process that should be aborted.
+ * Compliant LPF implementations will then quit sibling processes at
+ * latest at a call to #lpf_sync that should handle communications
+ * with the exited process. Sibling processes may also exit early without
+ * involvement of LPF. In all cases, the parent call to #lpf_exec,
+ * #lpf_hook, or #lpf_rehook should return with #LPF_ERR_FATAL.
+ *
+ * \warning Therefore, whenever possible, code implemented on top of LPF ideally
+ * does not rely on #lpf_abort. Instead, error handling more reliably
+ * could be implemented on top of the above-described default LPF
+ * behaviour.
+ *
+ * The call to #lpf_abort differs from the stdlib abort; for example,
+ * implementations are not required to raise SIGABRT as part of a call to
+ * #lpf_abort. Instead, the requirements are that:
+ * 1. processes that call this function terminate during the call to
+ * #lpf_abort;
+ * 2. all other processes associated with the distributed application terminate
+ * at latest during a next call to #lpf_sync that should have handled
+ * communications with any aborted process;
+ * 3. regardless of whether LPF aborted sibling processes, whether they exited
+ * gracefully, or whether they also called #lpf_abort, the process(es) which
+ * made the parent call to #lpf_exec, #lpf_hook, or #lpf_rehook should
+ * either: a) terminate also, at latest when all (other) associated
+ * processes have terminated, (exclusive-)or b) return #LPF_ERR_FATAL.
+ * Which behaviour (a or b) will be followed is up to the implementation,
+ * and portable applications should account for both possibilities.
+ *
+ * \note In the above, \em other is between parenthesis since the processes
+ * executing the application may be fully disjoint from the process that
+ * spawned the application. In this case it is natural to elect that the
+ * spawning process returns #LPF_ERR_FATAL, though under this
+ * specification also that process may be aborted before the spawning
+ * call returns.
+ *
+ * \note If one of the associated processes deadlock (e.g. due to executing
+ * while(1){}), it shall remain undefined when the entire
+ * application aborts. Implementations shall make a best effort to do this
+ * as early as possible.
+ *
+ * \note Though implied by the above, we note explicitly that #lpf_abort is
+ * \em not a collective function; a single process calling #lpf_abort can
+ * terminate all associated processes.
+ *
+ * @returns #LPF_SUCCESS If and only if #LPF_HAS_ABORT equals zero.
+ *
+ * If #LPF_HAS_ABORT is nonzero, then this function shall not return.
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_abort(lpf_t ctx);
+
+/**
+ * @}
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/lpf/core.h b/include/lpf/core.h
index c9a7f921..0d4434e2 100644
--- a/include/lpf/core.h
+++ b/include/lpf/core.h
@@ -126,6 +126,8 @@
* - \ref LPF_EXTENSIONS
* - \ref LPF_PTHREAD
* - \ref LPF_MPI
+ * - \ref LPF_HYBRID
+ * - \ref LPF_ABORT
* - \ref LPF_HL
* - \ref LPF_BSPLIB
* - \ref LPF_COLLECTIVES
@@ -988,7 +990,7 @@ typedef struct lpf_machine {
* both bounds are inclusive.
* \param[in] min_msg_size A byte size value that is larger or equal to 0.
* \param[in] attr A #lpf_sync_attr_t value. When in doubt, always
- * use #LPF_SYNC_DEFAULT
+ * use #LPF_SYNC_DEFAULT.
*
* \returns The guaranteed value for the message gap given an LPF SPMD
* section using \a p processes, for a superstep in which a user
@@ -2425,6 +2427,15 @@ lpf_err_t lpf_get_rcvd_msg_count( lpf_t ctx, size_t *rcvd_msgs);
extern _LPFLIB_API
lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t *sent_msgs, lpf_memslot_t slot);
+/**
+ * This function returns in @rcvd_msgs the total received message count.
+ * It is only implemented for the zero backend (on Infiniband)
+ * \param[in] ctx The LPF context
+ * \param[out] sent_msgs Sent message count
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_get_sent_msg_count( lpf_t ctx, size_t *sent_msgs);
+
/**
* This function blocks until all the scheduled messages via
* ibv_post_send are completed (via ibv_poll_cq). This includes
diff --git a/include/lpf/hybrid.h b/include/lpf/hybrid.h
index 00845f08..4c324adf 100644
--- a/include/lpf/hybrid.h
+++ b/include/lpf/hybrid.h
@@ -28,7 +28,7 @@ extern "C" {
*
* @{
*
- * \defgroup LPF_HYBRID Specific to Hybrid implementation
+ * \defgroup LPF_HYBRID Specific to the hybrid engine
*
* @{
*/
diff --git a/include/lpf/noc.h b/include/lpf/noc.h
new file mode 100644
index 00000000..8949f6e7
--- /dev/null
+++ b/include/lpf/noc.h
@@ -0,0 +1,472 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LPFLIB_NOC_H
+#define LPFLIB_NOC_H
+
+// import size_t data type for the implementation
+#ifndef DOXYGEN
+
+#ifdef __cplusplus
+#include
+#else
+#include
+#endif
+
+#include
+
+#endif // DOXYGEN
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \addtogroup LPF_EXTENSIONS LPF API extensions
+ *
+ * @{
+ *
+ * \defgroup LPF_NOC Extensions to LPF where it need not maintain consistency.
+ *
+ * This extension specifies facilities for (de-)registering memory slots,
+ * registering RDMA requests, and fencing RDMA requests. These extensions are,
+ * as far as possible, fully compatible with the core LPF definitions. These
+ * include LPF contexts (#lpf_t), processor count types (#lpf_pid_t), memory
+ * slot types (#lpf_memslot_t), and message attributes (#lpf_msg_attr_t).
+ *
+ * In this extension, LPF does not maintain consistency amongst processes that
+ * (de-)register memory slots while RDMA communication may occur. Maintaining
+ * the required consistency instead becomes the purview of the user. This
+ * extension specificies exactly what consistency properties the user must
+ * guarantee.
+ *
+ * \warning If LPF is considered a tool for the so-called hero
+ * programmer, then please note that this variant is even harder
+ * to program with.
+ *
+ * \note At present, no debug layer exists for this extension. It is unclear if
+ * such a debug layer is even possible (precisely because LPF in this
+ * extension does not maintain consistency, there is no way a debug layer
+ * could enforce it).
+ *
+ * @{
+ */
+
+
+/**
+ * The version of this no-conflict LPF specification. All implementations shall
+ * define this macro. The format is YYYYNN, where YYYY is the year the
+ * specification was released, and NN the number of the specifications released
+ * before this one in the same year.
+ */
+#define _LPF_NOC_VERSION 202400L
+
+/**
+ * Resizes the memory register for non-coherent RDMA.
+ *
+ * After a successful call to this function, the local process has enough
+ * resources to register \a max_regs memory regions in a non-coherent way.
+ *
+ * Each registration via lpf_noc_register() counts as one. Such registrations
+ * remain taking up capacity in the register until they are released via a call
+ * to lpf_noc_deregister(), which lowers the count of used memory registerations
+ * by one.
+ *
+ * There are no runtime out-of-bounds checks prescribed for lpf_noc_register()--
+ * this would also be too costly as error checking would require communication.
+ *
+ * If memory allocation were successful, the return value is #LPF_SUCCESS and
+ * the local process may assume the new buffer size \a max_regs.
+ *
+ * In the case of insufficient local memory the return value will be
+ * #LPF_ERR_OUT_OF_MEMORY. In that case, it is as if the call never happened and
+ * the user may retry the call locally after freeing up unused resources. Should
+ * retrying not lead to a successful call, the programmer may opt to broadcast
+ * the error (using existing slots) or to give up by returning from the spmd
+ * section.
+ *
+ * \note The current maximum cannot be retrieved from the runtime. Instead, the
+ * programmer must track this information herself. To provide
+ * encapsulation, see lpf_rehook().
+ *
+ * \note When the given memory register capacity is smaller than the current
+ * capacity, the runtime is allowed but not required to release the
+ * allocated memory. Such a call shall always be successful and return
+ * #LPF_SUCCESS.
+ *
+ * \note This means that an implementation that allows shrinking the given
+ * capacity must also ensure the old buffer remains intact in case there
+ * is not enough memory to allocate a smaller one.
+ *
+ * \note The last invocation of lpf_noc_resize_memory_register() determines the
+ * maximum number of memory registrations using lpf_noc_register() that
+ * can be maintained concurrently.
+ *
+ * \par Thread safety
+ * This function is safe to be called from different LPF processes only. Any
+ * further thread safety may be guaranteed by the implementation, but is not
+ * specified. Similar conditions hold for all LPF primitives that take an
+ * argument of type #lpf_t; see #lpf_t for more information.
+ *
+ * \param[in,out] ctx The runtime state as provided by lpf_exec().
+ * \param[in] max_regs The requested maximum number of memory regions that can
+ * be registered. This value must be the same on all
+ * processes.
+ *
+ * \returns #LPF_SUCCESS
+ * When this process successfully acquires the resources.
+ *
+ * \returns #LPF_ERR_OUT_OF_MEMORY
+ * When there was not enough memory left on the heap. In this case
+ * the effect is the same as when this call did not occur at all.
+ *
+ * \par BSP costs
+ * None
+ *
+ * See also \ref BSPCOSTS.
+ *
+ * \par Runtime costs
+ * \f$ \Theta( \mathit{max\_regs} ) \f$.
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_noc_resize_memory_register( lpf_t ctx, size_t max_regs );
+
+/**
+ * Registers a local memory area, preparing its use for intra-process
+ * communication.
+ *
+ * The registration process is necessary to enable Remote Direct Memory Access
+ * (RDMA) primitives, such as lpf_get() and lpf_put().
+ *
+ * This is \em not a collective function. For #lpf_get and #lpf_put, the memory
+ * slot returned by this function is equivalent to a memory slot returned by
+ * #lpf_register_local; the \a memslot returned by a successful call to this
+ * function (hence) is immediately valid. A successful call (hence) immediately
+ * consumes one memory slot capacity; see also #lpf_resize_memory_register on
+ * how to ensure sufficient capacity.
+ *
+ * Different from a memory slot returned by #lpf_register_local, a memory slot
+ * returned by a successful call to this function may serve as either a local
+ * or remote memory slot for #lpf_noc_put and #lpf_noc_get.
+ *
+ * Use of the returned memory slot to indicate a remote memory area may only
+ * occur by copying the returned memory slot to another LPF process. This may
+ * be done using the standard #lpf_put and #lpf_get methods or by using
+ * auxiliary communication mechanisms. The memory slot thus communicated only
+ * refers to a valid memory area on the process it originated from; any other
+ * use leads to undefined behaviour.
+ *
+ * \note Note that the ability to copy memory slots to act as identifiers of
+ * remote areas exploits the LPF core specification that instances of
+ * the #lpf_memslot_t type are, indeed, byte-copyable.
+ *
+ * A memory slot returned by a successful call to this function may be
+ * destroyed via a call to the standard #lpf_deregister. The deregistration
+ * takes effect immediately. No communication using the deregistered slot
+ * should occur during that superstep, or otherwise undefined behaviour occurs.
+ *
+ * Only the process that created the returned memory slot can destroy it; other
+ * LPF processes than the one which created it that attempt to destroy the
+ * returned memory slot, invoke undefined behaviour.
+ *
+ * Other than the above specified differences, the arguments to this function
+ * are the same as for #lpf_register_local:
+ *
+ * \param[in,out] ctx The runtime state as provided by lpf_exec().
+ * \param[in] pointer The pointer to the memory area to register.
+ * \param[in] size The size of the memory area to register in bytes.
+ * \param[out] memslot Where to store the memory slot identifier.
+ *
+ * \note Registering a slot with zero \a size is valid. The resulting memory
+ * slot cannot be written to nor read from by remote LPF processes.
+ *
+ * \note In particular, passing \c NULL as \a pointer and \c 0 for \a size is
+ * valid.
+ *
+ * \returns #LPF_SUCCESS
+ * Successfully registered the memory region and successfully
+ * assigned a memory slot identifier.
+ *
+ * \note One registration consumes one memory slot from the pool of locally
+ * available memory slots, which must have been preallocated by
+ * lpf_resize_memory_register() or recycled by lpf_deregister(). Always
+ * use lpf_resize_memory_register() at the start of the SPMD function
+ * that is executed by lpf_exec(), since lpf_exec() itself does not
+ * preallocate slots.
+ *
+ * \note It is illegal to request more memory slots than have previously been
+ * registered with lpf_resize_memory_register(). There is no runtime
+ * check for this error, because a safe way out cannot be guaranteed
+ * without significant parallel error checking overhead.
+ *
+ * \par Thread safety
+ * This function is safe to be called from different LPF processes only. Any
+ * further thread safety may be guaranteed by the implementation, but is not
+ * specified. Similar conditions hold for all LPF primitives that take an
+ * argument of type #lpf_t; see #lpf_t for more information.
+ *
+ * \par BSP costs
+ *
+ * None.
+ *
+ * \par Runtime costs
+ *
+ * \f$ \mathcal{O}( \texttt{size} ) \f$.
+ *
+ * \note This asymptotic bound may be attained for implementations that require
+ * linear-time processing on the registered memory area, such as to effect
+ * memory pinning. If this is not required, a good implementation will
+ * require only \f$ \Theta(1) \f$ time.
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_noc_register(
+ lpf_t ctx,
+ void * pointer,
+ size_t size,
+ lpf_memslot_t * memslot
+);
+
+/**
+ * Deregisters a memory area previously registered using lpf_noc_register().
+ *
+ * After a successful deregistration, the slot is returned to the pool of free
+ * memory slots. The total number of memory slots may be set via a call to
+ * lpf_noc_resize_memory_register().
+ *
+ * Deregistration takes effect immediately. A call to this function is not
+ * collective, and the other of deregistration does not need to match the order
+ * of registration. Any local or remote communication using the given \a memslot
+ * in the current superstep invokes undefined behaviour.
+ *
+ * \par Thread safety
+ * This function is safe to be called from different LPF processes only. Any
+ * further thread safety may be guaranteed by the implementation, but is not
+ * specified. Similar conditions hold for all LPF primitives that take an
+ * argument of type #lpf_t; see #lpf_t for more information.
+ *
+ * \param[in,out] ctx The runtime state as provided by lpf_exec().
+ * \param[in] memslot The memory slot identifier to de-register.
+ *
+ * \returns #LPF_SUCCESS
+ * Successfully deregistered the memory region.
+ *
+ * \par BSP costs
+ * None.
+ *
+ * \par Runtime costs
+ * \f$ \mathcal{O}(n) \f$, where \f$ n \f$ is the size of the memory region
+ * corresponding to \a memslot.
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_noc_deregister(
+ lpf_t ctx,
+ lpf_memslot_t memslot
+);
+
+/**
+ * Copies contents of local memory into the memory of remote processes.
+ *
+ * This operation is guaranteed to be completed after a call to the next
+ * lpf_sync() exits.
+ *
+ * Until that time it occupies one entry in the operations queue.
+ *
+ * Concurrent reads or writes from or to the same memory area are
+ * allowed in the same way they are for the core primitive #lpf_put.
+ *
+ * This primitive differs from #lpf_put in that the \a dst_slot may be the
+ * result of a successful call to #lpf_noc_register, while \a src_slot \em must
+ * be the results of such a successful call. In both cases, the slot need
+ * \em not have been registered before the last call to #lpf_sync.
+ *
+ * \par Thread safety
+ * This function is safe to be called from different LPF processes only. Any
+ * further thread safety may be guaranteed by the implementation, but is not
+ * specified. Similar conditions hold for all LPF primitives that take an
+ * argument of type #lpf_t; see #lpf_t for more information.
+ *
+ * \param[in,out] ctx The runtime state as provided by lpf_exec()
+ * \param[in] src_slot The memory slot of the local source memory area
+ * registered using lpf_register_local(),
+ * lpf_register_global(), or lpf_noc_register()
+ * \param[in] src_offset The offset of reading out the source memory area,
+ * w.r.t. the base location of the registered area
+ * expressed in bytes.
+ * \param[in] dst_pid The process ID of the destination process.
+ * \param[in] dst_slot The memory slot of the destination memory area at
+ * \a pid, registered using lpf_register_global() or
+ * lpf_noc_register().
+ * \param[in] dst_offset The offset of writing to the destination memory area
+ * w.r.t. the base location of the registered area
+ * expressed in bytes.
+ * \param[in] size The number of bytes to copy from the source memory area
+ * to the destination memory area.
+ * \param[in] attr
+ * \parblock
+ * In case an \a attr not equal to #LPF_MSG_DEFAULT is provided, the
+ * the message created by this function may have modified semantics
+ * that may be used to extend this API. Examples include:
+ *
+ * -# delaying the superstep deadline of delivery, and/or
+ * -# DRMA with message combining semantics.
+ *
+ * These attributes are stored after a call to this function has
+ * completed and may be modified immediately after without affecting
+ * any messages already scheduled.
+ * \endparblock
+ *
+ * \note See #lpf_put for notes regarding #lpf_msg_attr_t.
+ *
+ * \returns #LPF_SUCCESS
+ * When the communication request was recorded successfully.
+ *
+ * \par BSP costs
+ * This function will increase
+ * \f$ t_{c}^{(s)} \f$
+ * and
+ * \f$ r_{c}^{(\mathit{pid})} \f$
+ * by \a size, where c is the current superstep number and s is this process ID
+ * (as provided by #lpf_exec)). See \ref BSPCOSTS on how this affects real-time
+ * communication costs.
+ *
+ * \par Runtime costs
+ * See \ref BSPCOSTS.
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_noc_put(
+ lpf_t ctx,
+ lpf_memslot_t src_slot,
+ size_t src_offset,
+ lpf_pid_t dst_pid,
+ lpf_memslot_t dst_slot,
+ size_t dst_offset,
+ size_t size,
+ lpf_msg_attr_t attr
+);
+
+/**
+ * Copies contents from remote memory to local memory.
+ *
+ * This operation completes after one call to lpf_sync().
+ *
+ * Until that time it occupies one entry in the operations queue.
+ *
+ * Concurrent reads or writes from or to the same memory area are allowed in the
+ * same way it is for #lpf_get.
+ *
+ * This primitive differs from #lpf_get in that the \a src_slot may be the
+ * result of a successful call to #lpf_noc_register, while \a dst_slot \em must
+ * be the results of such a successful call. In both cases, the slot need
+ * \em not have been registered before the last call to #lpf_sync.
+ *
+ * \par Thread safety
+ * This function is safe to be called from different LPF processes only. Any
+ * further thread safety may be guaranteed by the implementation, but is not
+ * specified. Similar conditions hold for all LPF primitives that take an
+ * argument of type #lpf_t; see #lpf_t for more information.
+ *
+ * \param[in,out] ctx The runtime state as provided by lpf_exec().
+ * \param[in] src_pid The process ID of the source process.
+ * \param[in] src_slot The memory slot of the source memory area at \a pid, as
+ * globally registered with lpf_register_global() or
+ * lpf_noc_register().
+ * \param[in] src_offset The offset of reading out the source memory area,
+ * w.r.t. the base location of the registered area
+ * expressed in bytes.
+ * \param[in] dst_slot The memory slot of the local destination memory area
+ * registered using lpf_register_local(),
+ * lpf_register_global(), or lpf_noc_register().
+ * \param[in] dst_offset The offset of writing to the destination memory area
+ * w.r.t. the base location of the registered area
+ * expressed in bytes.
+ * \param[in] size The number of bytes to copy from the source
+ * remote memory location.
+ * \param[in] attr
+ * \parblock
+ * In case an \a attr not equal to #LPF_MSG_DEFAULT is provided, the
+ * the message created by this function may have modified semantics
+ * that may be used to extend this API. Examples include:
+ *
+ * -# delaying the superstep deadline of delivery, and/or
+ * -# DRMA with message combining semantics.
+ *
+ * These attributes are stored after a call to this function has
+ * completed and may be modified immediately after without affecting
+ * any messages already scheduled.
+ * \endparblock
+ *
+ * \note See #lpf_get for notes on the use of #lpf_msg_attr_t.
+ *
+ * \returns #LPF_SUCCESS
+ * When the communication request was recorded successfully.
+ *
+ * \par BSP costs
+ * This function will increase
+ * \f$ r_{c}^{(s)} \f$
+ * and
+ * \f$ t_{c}^{(\mathit{pid})} \f$
+ * by \a size, where c is the current superstep number and s is this process ID
+ * (as provided via lpf_exec(). See \ref BSPCOSTS on how this affects real-time
+ * communication costs.
+ *
+ * \par Runtime costs
+ * See \ref BSPCOSTS.
+ */
+extern _LPFLIB_API
+lpf_err_t lpf_noc_get(
+ lpf_t ctx,
+ lpf_pid_t src_pid,
+ lpf_memslot_t src_slot,
+ size_t src_offset,
+ lpf_memslot_t dst_slot,
+ size_t dst_offset,
+ size_t size,
+ lpf_msg_attr_t attr
+);
+
+extern _LPFLIB_API
+lpf_err_t lpf_noc_serialize_slot(
+ lpf_t ctx,
+ lpf_memslot_t slot,
+ char ** buff,
+ size_t * buff_size
+);
+/*
+ * lpf_deserialize_slot may only be called on a slot
+ * already registered via lpf_noc_register.
+ * This call sets the memory registration attributes from
+ * the byte array buff with byte size buff_size.
+ * This array must have been created via a call to
+ * @lpf_serialize_slot
+ */
+extern _LPFLIB_API
+ lpf_err_t lpf_noc_deserialize_slot(
+ lpf_t ctx,
+ char * buff,
+ lpf_memslot_t slot
+);
+/**
+ * @}
+ *
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/lpf/pthread.h b/include/lpf/pthread.h
index ba68f3f8..454eaf0e 100644
--- a/include/lpf/pthread.h
+++ b/include/lpf/pthread.h
@@ -28,7 +28,7 @@ extern "C" {
*
* @{
*
- * \defgroup LPF_PTHREAD Specific to Pthreads
+ * \defgroup LPF_PTHREAD Specific to the Pthreads engine
*
* @{
*/
diff --git a/include/lpf/static_dispatch.h b/include/lpf/static_dispatch.h
index 8816f9e9..71d65526 100644
--- a/include/lpf/static_dispatch.h
+++ b/include/lpf/static_dispatch.h
@@ -45,6 +45,7 @@
#undef lpf_register_local
#undef lpf_get_rcvd_msg_count
#undef lpf_get_rcvd_msg_count_per_slot
+#undef lpf_get_sent_msg_count
#undef lpf_get_sent_msg_count_per_slot
#undef lpf_register_global
#undef lpf_flush_sent
@@ -85,6 +86,7 @@
#undef LPF_NONE
#undef LPF_INIT_NONE
#undef LPF_NO_ARGS
+#undef LPF_HAS_ABORT
#ifdef LPF_FUNC
@@ -96,6 +98,7 @@
#define lpf_register_local LPF_FUNC(register_local)
#define lpf_get_rcvd_msg_count LPF_FUNC(get_rcvd_msg_count)
#define lpf_get_rcvd_msg_count_per_slot LPF_FUNC(get_rcvd_msg_count_per_slot)
+#define lpf_get_sent_msg_count LPF_FUNC(get_sent_msg_count)
#define lpf_get_sent_msg_count_per_slot LPF_FUNC(get_sent_msg_count_per_slot)
#define lpf_flush_sent LPF_FUNC(flush_sent)
#define lpf_flush_received LPF_FUNC(flush_received)
@@ -136,6 +139,7 @@
#define LPF_NONE LPF_CONST(NONE)
#define LPF_INIT_NONE LPF_CONST(INIT_NONE)
#define LPF_NO_ARGS LPF_CONST(NO_ARGS)
+#define LPF_HAS_ABORT LPF_CONST(HAS_ABORT)
#endif
diff --git a/lpfcc.in b/lpfcc.in
index b1a89659..b58da83a 100644
--- a/lpfcc.in
+++ b/lpfcc.in
@@ -187,6 +187,32 @@ do
shift
;;
+ # The below two special cases are to ensure good integration with CMake.
+ # Note that the arguments that follow -MT and -MQ are object files, which
+ # otherwise would be appended to the objects list. In case of manual
+ # usage of lpfcc, therefore, the use of these flags should come after --,
+ # however, it is unclear how to pass that to CMake and we choose this
+ # solution instead (nor would that be desired-- ideally, lpfcc can act as
+ # a "regular" CC from the CMake perspective)
+
+ -MT)
+ other_args[$arg_number]="-MT"
+ arg_number=$((arg_number + 1))
+ shift
+ other_args[$arg_number]="$arg"
+ arg_number=$((arg_number + 1))
+ shift
+ ;;
+
+ -MQ)
+ other_args[$arg_number]="-MQ"
+ arg_number=$((arg_number + 1))
+ shift
+ other_args[$arg_number]="$arg"
+ arg_number=$((arg_number + 1))
+ shift
+ ;;
+
*) case $state in
engine)
diff --git a/post-install/cmake-module-test/src/CMakeLists.txt b/post-install/cmake-module-test/src/CMakeLists.txt
index fe1ae2a8..eeef8252 100644
--- a/post-install/cmake-module-test/src/CMakeLists.txt
+++ b/post-install/cmake-module-test/src/CMakeLists.txt
@@ -15,7 +15,7 @@
# limitations under the License.
#
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.10)
project(findlpf_test)
find_package(lpf REQUIRED CONFIG)
diff --git a/post-install/func_lpf_hook_subset.mpimsg.cpp b/post-install/func_lpf_hook_subset.mpimsg.cpp
new file mode 100644
index 00000000..6b7d3a5c
--- /dev/null
+++ b/post-install/func_lpf_hook_subset.mpimsg.cpp
@@ -0,0 +1,67 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+
+
+const int LPF_MPI_AUTO_INITIALIZE=0;
+
+void test_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args)
+{
+ (void) ctx;
+ (void) pid;
+ (void) nprocs;
+ (void) args;
+ return;
+}
+
+void subset_func(MPI_Comm comm)
+{
+ MPI_Barrier(comm);
+
+ lpf_init_t init;
+ lpf_err_t rc = lpf_mpi_initialize_with_mpicomm(comm, &init);
+
+ rc = lpf_hook(init, test_spmd, LPF_NO_ARGS);
+}
+
+int main(int argc, char **argv)
+{
+ MPI_Init(&argc, &argv);
+
+ int s;
+ MPI_Comm_rank(MPI_COMM_WORLD, &s);
+
+ int subset = s < 2; // Processes are divided into 2 subsets {0,1} and {2,...,p-1}
+
+ MPI_Comm subset_comm;
+ MPI_Comm_split(MPI_COMM_WORLD, subset, s, &subset_comm);
+
+// only the first subset enters that function
+ if (subset)
+ {
+ subset_func(subset_comm);
+ }
+
+ MPI_Barrier(MPI_COMM_WORLD); // Paranoid barrier
+
+ MPI_Finalize();
+
+}
diff --git a/post-install/post-install-test.cmake.in b/post-install/post-install-test.cmake.in
index 65c9ef9f..75c5de13 100644
--- a/post-install/post-install-test.cmake.in
+++ b/post-install/post-install-test.cmake.in
@@ -268,12 +268,12 @@ if (MPI_FOUND)
endif()
- message("Compiling a simple LPF program with mpimsg engine")
+ message("Compiling a simple MPI LPF program with mpimsg engine")
# Compile this to check whether mpi.h can be found
execute_process(
- COMMAND @bindir@/lpfcc -engine mpimsg -I@common@
- @testdir@/func_lpf_hook_subset.mpimsg.c
- -o lpfhook_subset_mpimsg_cc
+ COMMAND @bindir@/lpfcxx -engine mpimsg -I@common@
+ @srcdir@/func_lpf_hook_subset.mpimsg.cpp -c
+ -o lpfhook_subset_mpimsg_cc.o
WORKING_DIRECTORY @builddir@
RESULT_VARIABLE status
)
@@ -353,6 +353,9 @@ endif()
###### CMake integration using generated CMake module file ############
foreach(engine @ENGINES@)
+ if ("${engine}" STREQUAL "zero")
+ continue()
+ endif()
message("Testing generated CMake module files for engine ${engine}")
set(test_dir @builddir@/cmake-module-test-${engine})
diff --git a/post-install/test-lpf-nprocs.c b/post-install/test-lpf-nprocs.c
index cf274b3f..554b5775 100644
--- a/post-install/test-lpf-nprocs.c
+++ b/post-install/test-lpf-nprocs.c
@@ -53,6 +53,8 @@ void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
lpf_memslot_t mem_slot = LPF_INVALID_MEMSLOT;
lpf_register_global( lpf, mem, nprocs, &mem_slot );
+ lpf_sync(lpf, LPF_SYNC_DEFAULT);
+
if (pid != 0)
lpf_get( lpf, 0, params_slot, 0, params_slot, 0, sizeof(params), LPF_MSG_DEFAULT );
diff --git a/src/MPI/CMakeLists.txt b/src/MPI/CMakeLists.txt
index 9633d1d5..636b243c 100644
--- a/src/MPI/CMakeLists.txt
+++ b/src/MPI/CMakeLists.txt
@@ -35,27 +35,25 @@ if (MPI_FOUND)
set_target_properties( lpf_proxy_dummy PROPERTIES LINK_FLAGS
"${MPI_C_LINK_FLAGS}" )
target_include_directories( lpf_proxy_dummy PRIVATE ${MPI_C_INCLUDE_PATH})
- target_compile_flags(lpf_proxy_dummy PRIVATE ${MPI_C_COMPILE_FLAGS})
install( TARGETS lpf_proxy_dummy RUNTIME DESTINATION ${INSTALL_HELPERS} )
-
set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME})
-# univ_ stands for universal interface => lpf_exec, lpf_put, etc...
-# spec_ stands for specific interface => lpf_mpimsg_release_exec, lpf_mpimsg_release_put, etc...
+ # univ_ stands for universal interface => lpf_exec, lpf_put, etc...
+ # spec_ stands for specific interface => lpf_mpimsg_release_exec, lpf_mpimsg_release_put, etc...
foreach (iface "univ_" "spec_" )
- foreach (LPF_IMPL_ID ${MPI_ENGINES})
- set(libname "lpf_core_${iface}${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}")
- set(comlib "lpf_common_${LPFLIB_CONFIG_NAME}")
-
- set(ibverbs_sources)
- if (LPF_IMPL_ID STREQUAL ibverbs)
+ foreach (LPF_IMPL_ID ${MPI_ENGINES})
+ set(libname "lpf_core_${iface}${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}")
+ set(comlib "lpf_common_${LPFLIB_CONFIG_NAME}")
+
+ set(ibverbs_sources)
+ if (LPF_IMPL_ID STREQUAL ibverbs)
set(ibverbs_sources ibverbs.cpp)
endif()
if (LPF_IMPL_ID STREQUAL zero)
- set(ibverbs_sources ibverbsZero.cpp)
+ set(ibverbs_sources ibverbsZero.cpp ibverbsNoc.cpp)
endif()
add_library(raw_${libname} OBJECT
@@ -71,15 +69,13 @@ if (MPI_FOUND)
spall2all.c
messagesort.cpp
spall2all.cpp
- init.cpp
+ init.cpp
${ibverbs_sources}
)
target_compile_flags(raw_${libname}
- PUBLIC ${MPI_C_COMPILE_FLAGS}
- INTERFACE "-fPIC"
- )
+ INTERFACE "-fPIC")
target_compile_definitions(raw_${libname}
PRIVATE "LPF_CORE_MPI_USES_${LPF_IMPL_ID}=1"
@@ -107,9 +103,7 @@ if (MPI_FOUND)
MACOSX_RPATH TRUE)
target_compile_flags(${libname}
- PUBLIC ${MPI_C_COMPILE_FLAGS}
- INTERFACE "-fPIC"
- )
+ INTERFACE "-fPIC")
if (iface STREQUAL "spec_")
target_compile_definitions(${libname}
@@ -175,31 +169,43 @@ if (MPI_FOUND)
${CMAKE_CURRENT_SOURCE_DIR}/dynamichook.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp)
- configure_file( dynamichook.t.sh.in dynamichook.t.sh @ONLY)
- set( dynamic_hook_t_sh "${CMAKE_CURRENT_BINARY_DIR}/dynamichook.t.sh")
- add_test(NAME dynamichook_1proc
- COMMAND bash ${dynamic_hook_t_sh} 1)
- set_tests_properties( dynamichook_1proc PROPERTIES TIMEOUT 30 )
- add_test(NAME dynamichook_2proc
- COMMAND bash ${dynamic_hook_t_sh} 2)
- set_tests_properties( dynamichook_2proc PROPERTIES TIMEOUT 30 )
- add_test(NAME dynamichook_3proc
- COMMAND bash ${dynamic_hook_t_sh} 3)
- set_tests_properties( dynamichook_3proc PROPERTIES TIMEOUT 30 )
- add_test(NAME dynamichook_10proc
- COMMAND bash ${dynamic_hook_t_sh} 10)
- set_tests_properties( dynamichook_10proc PROPERTIES TIMEOUT 30 )
+ configure_file( dynamichook.t.sh.in dynamichook.t.sh @ONLY)
+ set( dynamic_hook_t_sh "${CMAKE_CURRENT_BINARY_DIR}/dynamichook.t.sh")
+ add_test(NAME dynamichook_1proc
+ COMMAND bash ${dynamic_hook_t_sh} 1)
+ # We set all dynamichook tests to run in serial mode, without any other tests,
+ # since these tests occupy the same port and would block each other
+ set_tests_properties( dynamichook_1proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE)
+ add_test(NAME dynamichook_2proc
+ COMMAND bash ${dynamic_hook_t_sh} 2)
+ set_tests_properties( dynamichook_2proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE)
+ add_test(NAME dynamichook_3proc
+ COMMAND bash ${dynamic_hook_t_sh} 3)
+ set_tests_properties( dynamichook_3proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE)
+ add_test(NAME dynamichook_10proc
+ COMMAND bash ${dynamic_hook_t_sh} 10)
+ set_tests_properties( dynamichook_10proc PROPERTIES TIMEOUT 30 RUN_SERIAL TRUE)
endif()
# Other unit tests
if (ENABLE_IBVERBS AND LPF_ENABLE_TESTS)
add_gtest( ibverbs_test "ibverbs" ON ${CMAKE_CURRENT_SOURCE_DIR}/ibverbs.t.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/ibverbs.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp)
+ ibverbs.cpp mpilib.cpp)
add_gtest( zero_test "zero" ON ${CMAKE_CURRENT_SOURCE_DIR}/ibverbs.t.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ibverbsZero.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp)
+
+ # NOC test for HiCR
+ set(mode "")
+ set(LPF_IMPL_ID "zero")
+ set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME})
+ set(exeName "func_verbs_test_noc_register_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}")
+ add_gtest(${exeName} ${LPF_IMPL_ID} ON ${CMAKE_CURRENT_SOURCE_DIR}/func_verbs_test_noc_register.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/ibverbsZero.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/ibverbsNoc.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/mpilib.cpp)
+
endif()
foreach (engine ${MPI_ENGINES})
@@ -229,4 +235,3 @@ if (MPI_FOUND)
endif(MPI_FOUND)
-
diff --git a/src/MPI/core.cpp b/src/MPI/core.cpp
index 4340bd27..dc3f0a0f 100644
--- a/src/MPI/core.cpp
+++ b/src/MPI/core.cpp
@@ -15,8 +15,10 @@
* limitations under the License.
*/
+#include
#include
#include
+#include
#include
#include
@@ -36,6 +38,11 @@
#include
+
+// the value 2 in this implementation indicates support for lpf_abort in a way
+// that may deviate from the stdlib abort()
+const int LPF_HAS_ABORT = 2;
+
// Error codes.
// Note: Some code (e.g. in process::broadcastSymbol) depends on the
// fact that numbers are assigned in order of severity, where 0 means
@@ -331,6 +338,15 @@ lpf_err_t lpf_get_rcvd_msg_count( lpf_t ctx, size_t * rcvd_msgs)
return LPF_SUCCESS;
}
+lpf_err_t lpf_get_sent_msg_count( lpf_t ctx, size_t * sent_msgs)
+{
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted()) {
+ i->getSentMsgCount(sent_msgs);
+ }
+ return LPF_SUCCESS;
+}
+
lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t * sent_msgs, lpf_memslot_t slot)
{
lpf::Interface * i = realContext(ctx);
@@ -392,4 +408,106 @@ lpf_err_t lpf_abort( lpf_t ctx ) {
return LPF_SUCCESS;
}
+lpf_err_t lpf_noc_resize_memory_register( lpf_t ctx, size_t max_regs )
+{
+ lpf::Interface * i = realContext(ctx);
+ if (i->isAborted())
+ return LPF_SUCCESS;
+
+ return i->nocResizeMemreg(max_regs);
+}
+
+lpf_err_t lpf_noc_register(
+ lpf_t ctx,
+ void * pointer,
+ size_t size,
+ lpf_memslot_t * memslot
+)
+{
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted())
+ *memslot = i->nocRegister(pointer, size);
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_deregister(
+ lpf_t ctx,
+ lpf_memslot_t memslot
+)
+{
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted())
+ i->nocDeregister(memslot);
+
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_put(
+ lpf_t ctx,
+ lpf_memslot_t src_slot,
+ size_t src_offset,
+ lpf_pid_t dst_pid,
+ lpf_memslot_t dst_slot,
+ size_t dst_offset,
+ size_t size,
+ lpf_msg_attr_t attr
+)
+{
+ (void) attr; // ignore parameter 'msg' since this implementation only
+ // implements core functionality
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted())
+ i->nocPut( src_slot, src_offset, dst_pid, dst_slot, dst_offset, size );
+
+ return LPF_SUCCESS;
+
+}
+
+lpf_err_t lpf_noc_get(
+ lpf_t ctx,
+ lpf_pid_t pid,
+ lpf_memslot_t src,
+ size_t src_offset,
+ lpf_memslot_t dst,
+ size_t dst_offset,
+ size_t size,
+ lpf_msg_attr_t attr
+)
+{
+ (void) attr; // ignore parameter 'msg' since this implementation only
+ // implements core functionality
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted())
+ i->nocGet( pid, src, src_offset, dst, dst_offset, size );
+
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_serialize_slot(
+ lpf_t ctx,
+ lpf_memslot_t slot,
+ char ** buff,
+ size_t * buff_size
+)
+{
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted())
+ return i->serializeSlot(slot, buff, buff_size);
+
+ return LPF_ERR_FATAL;
+}
+
+lpf_err_t lpf_noc_deserialize_slot(
+ lpf_t ctx,
+ char * buff,
+ lpf_memslot_t slot
+)
+{
+ lpf::Interface * i = realContext(ctx);
+ if (!i->isAborted())
+ return i->deserializeSlot( buff, slot);
+
+ return LPF_ERR_FATAL;
+
+}
diff --git a/src/MPI/func_verbs_test_noc_register.cpp b/src/MPI/func_verbs_test_noc_register.cpp
new file mode 100644
index 00000000..6e9ad17f
--- /dev/null
+++ b/src/MPI/func_verbs_test_noc_register.cpp
@@ -0,0 +1,86 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ibverbsNoc.hpp"
+#include "mpilib.hpp"
+#include
+#include "gtest/gtest.h"
+
+
+using namespace lpf::mpi;
+
+extern "C" const int LPF_MPI_AUTO_INITIALIZE=0;
+
+
+/**
+ * \test Testing NOC functionality
+ * \pre P >= 2
+ * \pre P <= 2
+ * \return Exit code: 0
+ */
+TEST( API, func_verbsAPI_zero_test_noc_ring )
+{
+
+ char buf1[30] = {'\0'};
+ char buf2[30] = {'\0'};
+
+ strcpy(buf1, "HELLO");
+
+ MPI_Init(NULL, NULL);
+ Lib::instance();
+ Comm * comm = new Comm();
+ *comm = Lib::instance().world();
+ int rank = comm->pid();
+ assert(comm->nprocs() > 0);
+ comm->barrier();
+ IBVerbsNoc * verbs = new IBVerbsNoc( *comm );
+
+ verbs->resizeMemreg(3);
+ comm->barrier();
+
+ verbs->resizeMesgq( 2 );
+ comm->barrier();
+
+ IBVerbs::SlotID b1 = verbs->regLocal( buf1, sizeof(buf1) );
+ IBVerbs::SlotID b2 = verbs->regNoc( buf2, sizeof(buf2) );
+
+ auto mr = verbs->getMR(b1, rank);
+ mr = verbs->getMR(b2, rank);
+ assert(mr._addr != nullptr);
+ char * buffer;
+ size_t bufSize = mr.serialize(&buffer);
+ std::string bufAsString(buffer);
+
+ int left = (comm->nprocs() + rank - 1) % comm->nprocs();
+ int right = (rank + 1) % comm->nprocs();
+ char rmtBuff[bufSize];
+ std::stringstream ss(buffer);
+
+ MPI_Sendrecv(buffer, bufSize, MPI_BYTE, left, 0, rmtBuff, bufSize, MPI_BYTE, right, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+ MemoryRegistration * newMr = MemoryRegistration::deserialize(rmtBuff);
+ verbs->setMR(b2, right, *newMr);
+ comm->barrier();
+ verbs->put( b1, 0, right, b2, 0, sizeof(buf1));
+ verbs->sync(true);
+ EXPECT_EQ(std::string(buf2), std::string(buf1));
+ verbs->dereg(b1);
+ verbs->dereg(b2);
+ delete verbs;
+ delete comm;
+ MPI_Finalize();
+}
diff --git a/src/MPI/ibverbs.cpp b/src/MPI/ibverbs.cpp
index 5dcdbfc8..77832aae 100644
--- a/src/MPI/ibverbs.cpp
+++ b/src/MPI/ibverbs.cpp
@@ -45,9 +45,20 @@ namespace {
}
}
+size_t MemoryRegistration :: serialize(char ** buf) {
+ (void) buf;
+ throw IBVerbs::Exception( "MemoryRegistration::serialize(char ** buf) not implemented for base IBVerbs class");
+}
+
+MemoryRegistration * MemoryRegistration :: deserialize(char * buf)
+{
+ (void) buf;
+ throw IBVerbs::Exception( "MemoryRegistration::deserialize(char * buf) not implemented for base IBVerbs class");
+}
IBVerbs :: IBVerbs( Communication & comm )
- : m_pid( comm.pid() )
+ : m_comm( comm )
+ , m_pid( comm.pid() )
, m_nprocs( comm.nprocs() )
, m_devName()
, m_ibPort( Config::instance().getIBPort() )
@@ -72,7 +83,6 @@ IBVerbs :: IBVerbs( Communication & comm )
, m_memreg()
, m_dummyMemReg()
, m_dummyBuffer()
- , m_comm( comm )
{
m_peerList.reserve( m_nprocs );
@@ -97,7 +107,6 @@ IBVerbs :: IBVerbs( Communication & comm )
throw Exception( "No Infiniband devices available" );
}
-
std::string wantDevName = Config::instance().getIBDeviceName();
LOG( 3, "Searching for device '"<< wantDevName << "'" );
struct ibv_device * dev = NULL;
@@ -463,8 +472,8 @@ void IBVerbs :: resizeMemreg( size_t size )
throw std::bad_alloc() ;
}
- MemoryRegistration null = { 0, 0, 0, 0 };
- MemorySlot dflt; dflt.glob.resize( m_nprocs, null );
+ MemoryRegistration newMR = { nullptr, 0, 0, 0, m_pid};
+ MemorySlot dflt; dflt.glob.resize( m_nprocs, newMR );
m_memreg.reserve( size, dflt );
}
@@ -507,11 +516,7 @@ IBVerbs :: SlotID IBVerbs :: regLocal( void * addr, size_t size )
throw Exception("Could not register memory area");
}
}
- MemoryRegistration local;
- local.addr = addr;
- local.size = size;
- local.lkey = size?slot.mr->lkey:0;
- local.rkey = size?slot.mr->rkey:0;
+ MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid);
SlotID id = m_memreg.addLocalReg( slot );
@@ -551,11 +556,7 @@ IBVerbs :: SlotID IBVerbs :: regGlobal( void * addr, size_t size )
// exchange memory registration info globally
ref.glob.resize(m_nprocs);
- MemoryRegistration local;
- local.addr = addr;
- local.size = size;
- local.lkey = size?slot.mr->lkey:0;
- local.rkey = size?slot.mr->rkey:0;
+ MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid);
LOG(4, "All-gathering memory register data" );
@@ -583,13 +584,13 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset,
struct ibv_send_wr sr; std::memset(&sr, 0, sizeof(sr));
const char * localAddr
- = static_cast(src.glob[m_pid].addr) + srcOffset;
+ = static_cast(src.glob[m_pid]._addr) + srcOffset;
const char * remoteAddr
- = static_cast(dst.glob[dstPid].addr) + dstOffset;
+ = static_cast(dst.glob[dstPid]._addr) + dstOffset;
sge.addr = reinterpret_cast( localAddr );
sge.length = std::min(size, m_maxMsgSize );
- sge.lkey = src.mr->lkey;
+ sge.lkey = src.mr->lkey;
m_sges.push_back( sge );
bool lastMsg = ! m_activePeers.contains( dstPid );
@@ -603,7 +604,7 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset,
sr.num_sge = 1;
sr.opcode = IBV_WR_RDMA_WRITE;
sr.wr.rdma.remote_addr = reinterpret_cast( remoteAddr );
- sr.wr.rdma.rkey = dst.glob[dstPid].rkey;
+ sr.wr.rdma.rkey = dst.glob[dstPid]._rkey;
m_srsHeads[ dstPid ] = m_srs.size();
m_srs.push_back( sr );
@@ -632,9 +633,9 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset,
struct ibv_send_wr sr; std::memset(&sr, 0, sizeof(sr));
const char * localAddr
- = static_cast(dst.glob[m_pid].addr) + dstOffset;
+ = static_cast(dst.glob[m_pid]._addr) + dstOffset;
const char * remoteAddr
- = static_cast(src.glob[srcPid].addr) + srcOffset;
+ = static_cast(src.glob[srcPid]._addr) + srcOffset;
sge.addr = reinterpret_cast( localAddr );
sge.length = std::min(size, m_maxMsgSize );
@@ -652,7 +653,7 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset,
sr.num_sge = 1;
sr.opcode = IBV_WR_RDMA_READ;
sr.wr.rdma.remote_addr = reinterpret_cast( remoteAddr );
- sr.wr.rdma.rkey = src.glob[srcPid].rkey;
+ sr.wr.rdma.rkey = src.glob[srcPid]._rkey;
m_srsHeads[ srcPid ] = m_srs.size();
m_srs.push_back( sr );
diff --git a/src/MPI/ibverbs.hpp b/src/MPI/ibverbs.hpp
index f53c9354..b165f777 100644
--- a/src/MPI/ibverbs.hpp
+++ b/src/MPI/ibverbs.hpp
@@ -58,6 +58,23 @@ using std::shared_ptr;
using std::tr1::shared_ptr;
#endif
+class MemoryRegistration {
+ public:
+ char * _addr;
+ size_t _size;
+ uint32_t _lkey;
+ uint32_t _rkey;
+ int _pid;
+ MemoryRegistration(char * addr, size_t size, uint32_t lkey, uint32_t rkey, int pid) : _addr(addr),
+ _size(size), _lkey(lkey), _rkey(rkey), _pid(pid)
+ { }
+ MemoryRegistration() : _addr(nullptr), _size(0), _lkey(0), _rkey(0), _pid(-1) {}
+ size_t serialize(char ** buf);
+ static MemoryRegistration * deserialize(char * buf);
+
+};
+
+
class _LPFLIB_LOCAL IBVerbs
{
public:
@@ -72,6 +89,7 @@ class _LPFLIB_LOCAL IBVerbs
void resizeMesgq( size_t size );
SlotID regLocal( void * addr, size_t size );
+ SlotID regNoc( void * addr, size_t size );
SlotID regGlobal( void * addr, size_t size );
void dereg( SlotID id );
@@ -93,7 +111,7 @@ class _LPFLIB_LOCAL IBVerbs
void doRemoteProgress();
- void countingSyncPerSlot(bool resized, SlotID tag, size_t sent, size_t recvd);
+ void countingSyncPerSlot(SlotID tag, size_t sent, size_t recvd);
/**
* @syncPerSlot only guarantees that all already scheduled sends (via put),
* or receives (via get) associated with a slot are completed. It does
@@ -101,16 +119,18 @@ class _LPFLIB_LOCAL IBVerbs
* no guarantee that a remote process will wait til data is put into its
* memory, as it does schedule the operation (one-sided).
*/
- void syncPerSlot(bool resized, SlotID slot);
+ void syncPerSlot(SlotID slot);
// Do the communication and synchronize
// 'Reconnect' must be a globally replicated value
void sync( bool reconnect);
void get_rcvd_msg_count(size_t * rcvd_msgs);
+ void get_sent_msg_count(size_t * sent_msgs);
void get_rcvd_msg_count_per_slot(size_t * rcvd_msgs, SlotID slot);
void get_sent_msg_count_per_slot(size_t * sent_msgs, SlotID slot);
-private:
+
+protected:
IBVerbs & operator=(const IBVerbs & ); // assignment prohibited
IBVerbs( const IBVerbs & ); // copying prohibited
@@ -123,22 +143,16 @@ class _LPFLIB_LOCAL IBVerbs
void doProgress();
void tryIncrement(Op op, Phase phase, SlotID slot);
- struct MemoryRegistration {
- void * addr;
- size_t size;
- uint32_t lkey;
- uint32_t rkey;
- };
-
struct MemorySlot {
shared_ptr< struct ibv_mr > mr; // verbs structure
std::vector< MemoryRegistration > glob; // array for global registrations
};
+
+ Communication & m_comm;
int m_pid; // local process ID
int m_nprocs; // number of processes
std::atomic_size_t m_numMsgs;
- //std::atomic_size_t m_sendTotalInitMsgCount;
std::atomic_size_t m_recvTotalInitMsgCount;
std::atomic_size_t m_sentMsgs;
std::atomic_size_t m_recvdMsgs;
@@ -157,8 +171,6 @@ class _LPFLIB_LOCAL IBVerbs
size_t m_cqSize;
size_t m_minNrMsgs;
size_t m_maxSrs; // maximum number of sends requests per QP
- size_t m_postCount;
- size_t m_recvCount;
shared_ptr< struct ibv_context > m_device; // device handle
shared_ptr< struct ibv_pd > m_pd; // protection domain
@@ -173,10 +185,6 @@ class _LPFLIB_LOCAL IBVerbs
// Connected queue pairs
std::vector< shared_ptr > m_connectedQps;
- std::vector rcvdMsgCount;
- std::vector sentMsgCount;
- std::vector getMsgCount;
- std::vector slotActive;
std::vector< struct ibv_send_wr > m_srs; // array of send requests
@@ -193,8 +201,13 @@ class _LPFLIB_LOCAL IBVerbs
shared_ptr< struct ibv_mr > m_dummyMemReg; // registration of dummy buffer
std::vector< char > m_dummyBuffer; // dummy receive buffer
-
- Communication & m_comm;
+ //
+ std::vector rcvdMsgCount;
+ std::vector sentMsgCount;
+ std::vector getMsgCount;
+ std::vector slotActive;
+ size_t m_postCount;
+ size_t m_recvCount;
};
diff --git a/src/MPI/ibverbsNoc.cpp b/src/MPI/ibverbsNoc.cpp
new file mode 100644
index 00000000..7f185fc1
--- /dev/null
+++ b/src/MPI/ibverbsNoc.cpp
@@ -0,0 +1,98 @@
+#include "ibverbsNoc.hpp"
+
+namespace lpf
+{
+namespace mpi
+{
+
+ size_t MemoryRegistration :: serialize(char ** buf) {
+ std::stringstream ss;
+ size_t bufSize = sizeof(uintptr_t) + sizeof(size_t) + 2*sizeof(uint32_t) + sizeof(int);
+ *buf = new char[bufSize];
+ char *ptr = *buf;
+ uintptr_t addrAsUintPtr = reinterpret_cast(_addr);
+ memcpy(ptr, &addrAsUintPtr, sizeof(uintptr_t));
+ ptr += sizeof(uintptr_t);
+ memcpy(ptr, &_size, sizeof(size_t));
+ ptr += sizeof(size_t);
+ memcpy(ptr, &_lkey, sizeof(uint32_t));
+ ptr += sizeof(uint32_t);
+ memcpy(ptr, &_rkey, sizeof(uint32_t));
+ ptr += sizeof(uint32_t);
+ memcpy(ptr, &_pid, sizeof(int));
+ return bufSize;
+ }
+
+ MemoryRegistration * MemoryRegistration :: deserialize(char * buf) {
+
+ char * addr;
+ size_t size;
+ uint32_t lkey;
+ uint32_t rkey;
+ uintptr_t addrAsUintPtr;
+ int pid;
+ char * ptr = buf;
+ memcpy(&addrAsUintPtr, ptr, sizeof(uintptr_t));
+ addr = reinterpret_cast(addrAsUintPtr);
+ ptr += sizeof(uintptr_t);
+ memcpy(&size, ptr, sizeof(size_t));
+ ptr += sizeof(size_t);
+ memcpy(&lkey, ptr, sizeof(uint32_t));
+ ptr += sizeof(uint32_t);
+ memcpy(&rkey, ptr, sizeof(uint32_t));
+ ptr += sizeof(uint32_t);
+ memcpy(&pid, ptr, sizeof(int));
+ return new MemoryRegistration(addr, size, lkey, rkey, pid);
+ }
+
+ struct IBVerbsNoc::Exception : std::runtime_error {
+ Exception(const char * what) : std::runtime_error( what ) {}
+ };
+
+ MemoryRegistration IBVerbsNoc :: getMR(SlotID slotId, int pid)
+ {
+ const MemorySlot & slot = m_memreg.lookup( slotId );
+ return slot.glob[pid];
+ }
+
+ void IBVerbsNoc::setMR(SlotID slotId, int pid, MemoryRegistration & mr)
+ {
+ m_memreg.update(slotId).glob[pid] = mr;
+ }
+
+ IBVerbsNoc::IBVerbsNoc(Communication & comm) : IBVerbs(comm)
+ {
+ }
+
+ IBVerbs::SlotID IBVerbsNoc :: regNoc( void * addr, size_t size )
+ {
+ ASSERT( size <= m_maxRegSize );
+
+ MemorySlot slot;
+ if ( size > 0) {
+ LOG(4, "IBVerbsNoc::regLocal: Registering locally memory area at " << addr << " of size " << size );
+ struct ibv_mr * const ibv_mr_new_p = ibv_reg_mr(
+ m_pd.get(), addr, size,
+ IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE
+ );
+ if( ibv_mr_new_p == NULL )
+ slot.mr.reset();
+ else
+ slot.mr.reset( ibv_mr_new_p, ibv_dereg_mr );
+ if (!slot.mr) {
+ LOG(1, "Could not register memory area at "
+ << addr << " of size " << size << " with IB device");
+ throw Exception("Could not register memory area");
+ }
+ }
+ MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid);
+
+ SlotID id = m_memreg.addNocReg( slot );
+ m_memreg.update( id ).glob.resize( m_nprocs );
+ m_memreg.update( id ).glob[m_pid] = local;
+ LOG(4, "Memory area " << addr << " of size " << size << " has been locally registered as NOC slot. Slot = " << id );
+ return id;
+ }
+
+} // namespace mpi
+} // namespace lpf
diff --git a/src/MPI/ibverbsNoc.hpp b/src/MPI/ibverbsNoc.hpp
new file mode 100644
index 00000000..d9ece946
--- /dev/null
+++ b/src/MPI/ibverbsNoc.hpp
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "ibverbs.hpp"
+
+namespace lpf
+{
+
+namespace mpi
+{
+ class _LPFLIB_LOCAL IBVerbsNoc : public IBVerbs {
+ public:
+ IBVerbsNoc(Communication & comm);
+ IBVerbs::SlotID regNoc( void * addr, size_t size );
+ MemoryRegistration getMR(SlotID slotId, int pid);
+ void setMR(SlotID slotId, int pid, MemoryRegistration & mr);
+
+ };
+} // namespace mpi
+} // namespace lpf
diff --git a/src/MPI/ibverbsZero.cpp b/src/MPI/ibverbsZero.cpp
index 6f52fa5b..7cec923a 100644
--- a/src/MPI/ibverbsZero.cpp
+++ b/src/MPI/ibverbsZero.cpp
@@ -53,14 +53,20 @@ namespace {
IBVerbs :: IBVerbs( Communication & comm )
- : m_pid( comm.pid() )
+ : m_comm( comm )
+ , m_pid( comm.pid() )
, m_nprocs( comm.nprocs() )
+ , m_numMsgs(0)
+ , m_recvTotalInitMsgCount(0)
+ , m_sentMsgs(0)
+ , m_recvdMsgs(0)
, m_devName()
, m_ibPort( Config::instance().getIBPort() )
, m_gidIdx( Config::instance().getIBGidIndex() )
, m_mtu( getMTU( Config::instance().getIBMTU() ))
, m_maxRegSize(0)
, m_maxMsgSize(0)
+ , m_cqSize(1)
, m_minNrMsgs(0)
, m_maxSrs(0)
, m_device()
@@ -78,14 +84,8 @@ IBVerbs :: IBVerbs( Communication & comm )
, m_memreg()
, m_dummyMemReg()
, m_dummyBuffer()
- , m_comm( comm )
- , m_cqSize(1)
, m_postCount(0)
, m_recvCount(0)
- , m_numMsgs(0)
- , m_recvTotalInitMsgCount(0)
- , m_sentMsgs(0)
- , m_recvdMsgs(0)
{
// arrays instead of hashmap for counters
@@ -260,7 +260,7 @@ IBVerbs :: ~IBVerbs()
inline void IBVerbs :: tryIncrement(Op op, Phase phase, SlotID slot) {
-
+
switch (phase) {
case Phase::INIT:
rcvdMsgCount[slot] = 0;
@@ -306,7 +306,7 @@ inline void IBVerbs :: tryIncrement(Op op, Phase phase, SlotID slot) {
void IBVerbs :: stageQPs( size_t maxMsgs )
{
// create the queue pairs
- for ( int i = 0; i < m_nprocs; ++i) {
+ for ( size_t i = 0; i < static_cast(m_nprocs); ++i) {
struct ibv_qp_init_attr attr;
std::memset(&attr, 0, sizeof(attr));
@@ -321,6 +321,7 @@ void IBVerbs :: stageQPs( size_t maxMsgs )
attr.cap.max_recv_sge = 1;
struct ibv_qp * const ibv_new_qp_p = ibv_create_qp( m_pd.get(), &attr );
+ ASSERT(m_stagedQps.size() > i);
if( ibv_new_qp_p == NULL ) {
m_stagedQps[i].reset();
} else {
@@ -352,7 +353,7 @@ void IBVerbs :: doRemoteProgress() {
pollResult = ibv_poll_cq(m_cqRemote.get(), POLL_BATCH, wcs);
if (pollResult > 0) {
LOG(3, "Process " << m_pid << " signals: I received " << pollResult << " remote messages in doRemoteProgress");
- }
+ }
else if (pollResult < 0)
{
LOG( 1, "Failed to poll IB completion queue" );
@@ -367,10 +368,10 @@ void IBVerbs :: doRemoteProgress() {
<< wcs[i].vendor_err );
}
else {
- LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].src_qp = "<< wcs[i].src_qp);
- LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].slid = "<< wcs[i].slid);
- LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].wr_id = "<< wcs[i].wr_id);
- LOG(2, "Process " << m_pid << " Recv wcs[" << i << "].imm_data = "<< wcs[i].imm_data);
+ LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].src_qp = "<< wcs[i].src_qp);
+ LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].slid = "<< wcs[i].slid);
+ LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].wr_id = "<< wcs[i].wr_id);
+ LOG(3, "Process " << m_pid << " Recv wcs[" << i << "].imm_data = "<< wcs[i].imm_data);
/**
* Here is a trick:
@@ -463,7 +464,6 @@ void IBVerbs :: reconnectQPs()
struct ibv_recv_wr rr; std::memset(&rr, 0, sizeof(rr));
struct ibv_sge sge; std::memset(&sge, 0, sizeof(sge));
- struct ibv_recv_wr *bad_wr = NULL;
sge.addr = reinterpret_cast(m_dummyBuffer.data());
sge.length = m_dummyBuffer.size();
sge.lkey = m_dummyMemReg->lkey;
@@ -553,8 +553,8 @@ void IBVerbs :: resizeMemreg( size_t size )
throw std::bad_alloc() ;
}
- MemoryRegistration null = { 0, 0, 0, 0 };
- MemorySlot dflt; dflt.glob.resize( m_nprocs, null );
+ MemoryRegistration newMR = { nullptr, 0, 0, 0, m_pid};
+ MemorySlot dflt; dflt.glob.resize( m_nprocs, newMR);
m_memreg.reserve( size, dflt );
}
@@ -616,14 +616,10 @@ IBVerbs :: SlotID IBVerbs :: regLocal( void * addr, size_t size )
throw Exception("Could not register memory area");
}
}
- MemoryRegistration local;
- local.addr = addr;
- local.size = size;
- local.lkey = size?slot.mr->lkey:0;
- local.rkey = size?slot.mr->rkey:0;
+ MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid);
SlotID id = m_memreg.addLocalReg( slot );
- tryIncrement(Op::SEND/* <- dummy for init */, Phase::INIT, id);
+ tryIncrement(Op::SEND, Phase::INIT, id);
m_memreg.update( id ).glob.resize( m_nprocs );
m_memreg.update( id ).glob[m_pid] = local;
@@ -662,12 +658,7 @@ IBVerbs :: SlotID IBVerbs :: regGlobal( void * addr, size_t size )
// exchange memory registration info globally
ref.glob.resize(m_nprocs);
- MemoryRegistration local;
- local.addr = addr;
- local.size = size;
- local.lkey = size?slot.mr->lkey:0;
- local.rkey = size?slot.mr->rkey:0;
-
+ MemoryRegistration local((char *) addr, size, size?slot.mr->lkey:0, size?slot.mr->rkey:0, m_pid);
LOG(4, "All-gathering memory register data" );
m_comm.allgather( local, ref.glob.data() );
@@ -694,9 +685,9 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst
const MemorySlot & dst = m_memreg.lookup( dstSlot);
char * localAddr
- = static_cast(src.glob[m_pid].addr) + srcOffset;
+ = static_cast(src.glob[m_pid]._addr) + srcOffset;
const char * remoteAddr
- = static_cast(dst.glob[dstPid].addr) + dstOffset;
+ = static_cast(dst.glob[dstPid]._addr) + dstOffset;
struct ibv_sge sge;
memset(&sge, 0, sizeof(sge));
@@ -704,7 +695,6 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst
sge.length = std::min(size, m_maxMsgSize );
sge.lkey = src.mr->lkey;
- struct ibv_wc wcs[POLL_BATCH];
struct ibv_send_wr wr;
memset(&wr, 0, sizeof(wr));
wr.wr_id = srcSlot;
@@ -716,7 +706,7 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst
wr.wr.atomic.remote_addr = reinterpret_cast(remoteAddr);
wr.wr.atomic.compare_add = compare_add;
wr.wr.atomic.swap = swap;
- wr.wr.atomic.rkey = dst.glob[dstPid].rkey;
+ wr.wr.atomic.rkey = dst.glob[dstPid]._rkey;
struct ibv_send_wr *bad_wr;
int error;
std::vector opcodes;
@@ -729,7 +719,7 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst
}
/**
- * Keep waiting on a completion of events until you
+ * Keep waiting on a completion of events until you
* register a completed atomic compare-and-swap
*/
do {
@@ -741,7 +731,7 @@ void IBVerbs :: blockingCompareAndSwap(SlotID srcSlot, size_t srcOffset, int dst
} while (std::find(opcodes.begin(), opcodes.end(), IBV_WC_COMP_SWAP) == opcodes.end());
uint64_t * remoteValueFound = reinterpret_cast(localAddr);
- /*
+ /*
* if we fetched the value we expected, then
* we are holding the lock now (that is, we swapped successfully!)
* else, re-post your request for the lock
@@ -775,9 +765,9 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset,
sge = &sges[i]; std::memset(sge, 0, sizeof(ibv_sge));
sr = &srs[i]; std::memset(sr, 0, sizeof(ibv_send_wr));
const char * localAddr
- = static_cast(src.glob[m_pid].addr) + srcOffset;
+ = static_cast(src.glob[m_pid]._addr) + srcOffset;
const char * remoteAddr
- = static_cast(dst.glob[dstPid].addr) + dstOffset;
+ = static_cast(dst.glob[dstPid]._addr) + dstOffset;
sge->addr = reinterpret_cast( localAddr );
sge->length = std::min(size, m_maxMsgSize );
@@ -791,9 +781,9 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset,
sr->send_flags = lastMsg ? IBV_SEND_SIGNALED : 0;
sr->opcode = lastMsg? IBV_WR_RDMA_WRITE_WITH_IMM : IBV_WR_RDMA_WRITE;
/* use wr_id to later demultiplex srcSlot */
- sr->wr_id = srcSlot;
+ sr->wr_id = srcSlot;
/*
- * In HiCR, we need to know at receiver end which slot
+ * In HiCR, we need to know at receiver end which slot
* has received the message. But here is a trick:
*/
sr->imm_data = dstSlot;
@@ -801,7 +791,7 @@ void IBVerbs :: put( SlotID srcSlot, size_t srcOffset,
sr->sg_list = &sges[i];
sr->num_sge = 1;
sr->wr.rdma.remote_addr = reinterpret_cast( remoteAddr );
- sr->wr.rdma.rkey = dst.glob[dstPid].rkey;
+ sr->wr.rdma.rkey = dst.glob[dstPid]._rkey;
srs[i] = *sr;
size -= sge->length;
@@ -843,9 +833,9 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset,
sr = &srs[i]; std::memset(sr, 0, sizeof(ibv_send_wr));
const char * localAddr
- = static_cast(dst.glob[m_pid].addr) + dstOffset;
+ = static_cast(dst.glob[m_pid]._addr) + dstOffset;
const char * remoteAddr
- = static_cast(src.glob[srcPid].addr) + srcOffset;
+ = static_cast(src.glob[srcPid]._addr) + srcOffset;
sge->addr = reinterpret_cast( localAddr );
sge->length = std::min(size, m_maxMsgSize );
@@ -861,7 +851,7 @@ void IBVerbs :: get( int srcPid, SlotID srcSlot, size_t srcOffset,
sr->num_sge = 1;
sr->opcode = IBV_WR_RDMA_READ;
sr->wr.rdma.remote_addr = reinterpret_cast( remoteAddr );
- sr->wr.rdma.rkey = src.glob[srcPid].rkey;
+ sr->wr.rdma.rkey = src.glob[srcPid]._rkey;
// This logic is reversed compared to ::put
// (not srcSlot, as this slot is remote)
sr->wr_id = dstSlot; // <= DO NOT CHANGE THIS !!!
@@ -890,25 +880,29 @@ void IBVerbs :: get_rcvd_msg_count(size_t * rcvd_msgs) {
*rcvd_msgs = m_recvdMsgs;
}
+void IBVerbs :: get_sent_msg_count(size_t * sent_msgs) {
+ *sent_msgs = m_sentMsgs;
+}
+
void IBVerbs :: get_rcvd_msg_count_per_slot(size_t * rcvd_msgs, SlotID slot)
{
- *rcvd_msgs = rcvdMsgCount[slot];
+ *rcvd_msgs = rcvdMsgCount[slot] + getMsgCount[slot];
}
void IBVerbs :: get_sent_msg_count_per_slot(size_t * sent_msgs, SlotID slot)
{
- *sent_msgs = sentMsgCount.at(slot);
+ *sent_msgs = sentMsgCount[slot];
}
std::vector IBVerbs :: wait_completion(int& error) {
error = 0;
- LOG(5, "Polling for messages" );
+ LOG(1, "Polling for messages" );
struct ibv_wc wcs[POLL_BATCH];
int pollResult = ibv_poll_cq(m_cqLocal.get(), POLL_BATCH, wcs);
std::vector opcodes;
if ( pollResult > 0) {
- LOG(3, "Process " << m_pid << ": Received " << pollResult << " acknowledgements");
+ LOG(4, "Process " << m_pid << ": Received " << pollResult << " acknowledgements");
for (int i = 0; i < pollResult ; ++i) {
if (wcs[i].status != IBV_WC_SUCCESS)
@@ -923,10 +917,10 @@ std::vector IBVerbs :: wait_completion(int& error) {
error = 1;
}
else {
- LOG(3, "Process " << m_pid << " Send wcs[" << i << "].src_qp = "<< wcs[i].src_qp);
- LOG(3, "Process " << m_pid << " Send wcs[" << i << "].slid = "<< wcs[i].slid);
- LOG(3, "Process " << m_pid << " Send wcs[" << i << "].wr_id = "<< wcs[i].wr_id);
- LOG(3, "Process " << m_pid << " Send wcs[" << i << "].imm_data = "<< wcs[i].imm_data);
+ LOG(4, "Process " << m_pid << " Send wcs[" << i << "].src_qp = "<< wcs[i].src_qp);
+ LOG(4, "Process " << m_pid << " Send wcs[" << i << "].slid = "<< wcs[i].slid);
+ LOG(4, "Process " << m_pid << " Send wcs[" << i << "].wr_id = "<< wcs[i].wr_id);
+ LOG(4, "Process " << m_pid << " Send wcs[" << i << "].imm_data = "<< wcs[i].imm_data);
}
SlotID slot = wcs[i].wr_id;
@@ -936,18 +930,20 @@ std::vector IBVerbs :: wait_completion(int& error) {
// This is a get call completing
if (wcs[i].opcode == IBV_WC_RDMA_READ) {
tryIncrement(Op::GET, Phase::POST, slot);
+ LOG(4, "Rank " << m_pid << " with GET, increments getMsgCount to " << getMsgCount[slot] << " for LPF slot " << slot);
}
// This is a put call completing
- if (wcs[i].opcode == IBV_WC_RDMA_WRITE)
+ if (wcs[i].opcode == IBV_WC_RDMA_WRITE) {
tryIncrement(Op::SEND, Phase::POST, slot);
+ LOG(4, "Rank " << m_pid << " with SEND, increments getMsgCount to " << sentMsgCount[slot] << " for LPF slot " << slot);
+ }
- LOG(3, "Rank " << m_pid << " increments sent message count to " << sentMsgCount[slot] << " for LPF slot " << slot);
}
}
}
else if (pollResult < 0)
{
- LOG( 5, "Failed to poll IB completion queue" );
+ LOG( 1, "Failed to poll IB completion queue" );
throw Exception("Poll CQ failure");
}
return opcodes;
@@ -980,10 +976,12 @@ void IBVerbs :: flushSent()
}
-void IBVerbs :: countingSyncPerSlot(bool resized, SlotID slot, size_t expectedSent, size_t expectedRecvd) {
+void IBVerbs :: countingSyncPerSlot(SlotID slot, size_t expectedSent, size_t expectedRecvd) {
- size_t actualRecvd;
- size_t actualSent;
+ bool sentOK = false;
+ bool recvdOK = false;
+ if (expectedSent == 0) sentOK = true;
+ if (expectedRecvd == 0) recvdOK = true;
int error;
if (slotActive[slot]) {
do {
@@ -995,14 +993,25 @@ void IBVerbs :: countingSyncPerSlot(bool resized, SlotID slot, size_t expectedSe
// this call triggers doRemoteProgress
doRemoteProgress();
- } while (
- (rcvdMsgCount[slot] < m_recvInitMsgCount[slot]) ||
- (sentMsgCount[slot] < m_sendInitMsgCount[slot])
- );
+ /*
+ * 1) Are we expecting nothing here (sentOK/recvdOK = true)
+ * 2) do the sent and received messages match our expectations?
+ */
+ sentOK = (sentOK || sentMsgCount[slot] >= expectedSent);
+ // We can receive messages passively (from remote puts) and actively (from our gets)
+ recvdOK = (recvdOK || (rcvdMsgCount[slot] + getMsgCount[slot]) >= expectedRecvd);
+ LOG(4, "PID: " << m_pid << " rcvdMsgCount[" << slot << "] = " << rcvdMsgCount[slot]
+ << " expectedRecvd = " << expectedRecvd
+ << " sentMsgCount[" << slot << "] = " << sentMsgCount[slot]
+ << " expectedSent = " << expectedSent
+ << " m_recvInitMsgCount[" << slot << "] = " << m_recvInitMsgCount[slot]
+ << " m_sendInitMsgCount[" << slot << "] = " << m_sendInitMsgCount[slot]);
+
+ } while (!(sentOK && recvdOK));
}
}
-void IBVerbs :: syncPerSlot(bool resized, SlotID slot) {
+void IBVerbs :: syncPerSlot(SlotID slot) {
int error;
do {
@@ -1034,15 +1043,14 @@ void IBVerbs :: syncPerSlot(bool resized, SlotID slot) {
void IBVerbs :: sync(bool resized)
{
-
- int error = 0;
+ (void) resized;
// flush send queues
flushSent();
// flush receive queues
flushReceived();
- LOG(1, "Process " << m_pid << " will call barrier\n");
+ LOG(4, "Process " << m_pid << " will call barrier at end of sync\n");
m_comm.barrier();
diff --git a/src/MPI/interface.cpp b/src/MPI/interface.cpp
index 80123e58..53203042 100644
--- a/src/MPI/interface.cpp
+++ b/src/MPI/interface.cpp
@@ -75,7 +75,10 @@ void Interface :: initRoot(int *argc, char ***argv)
Interface :: Interface( mpi::Comm machine, Process & subprocess )
try : m_comm( machine )
, m_subprocess( subprocess )
- , m_mesgQueue( m_comm )
+
+//#if defined (LPF_CORE_MPI_USES_zero) || defined (LPF_CORE_MPI_USES_ibverbs)
+ ,m_mesgQueue( m_comm)
+//#endif
, m_aborted( false )
{
if ( machine.allreduceOr( false ) )
@@ -129,6 +132,15 @@ void Interface :: getSentMsgCountPerSlot(size_t * msgs, SlotID slot) {
m_mesgQueue.getSentMsgCountPerSlot(msgs, slot);
}
+
+void Interface :: getRcvdMsgCount(size_t * msgs) {
+ m_mesgQueue.getRcvdMsgCount(msgs);
+}
+
+void Interface :: getSentMsgCount(size_t * msgs) {
+ m_mesgQueue.getSentMsgCount(msgs);
+}
+
void Interface :: flushSent() {
m_mesgQueue.flushSent();
}
@@ -137,10 +149,6 @@ void Interface :: flushReceived() {
m_mesgQueue.flushReceived();
}
-void Interface :: getRcvdMsgCount(size_t * msgs) {
- m_mesgQueue.getRcvdMsgCount(msgs);
-}
-
err_t Interface :: countingSyncPerSlot(memslot_t slot, size_t expected_sent, size_t expected_rcvd)
{
if ( 0 == m_aborted )
@@ -219,6 +227,51 @@ void Interface :: abort()
#endif
}
+/* start NOC extensions */
+memslot_t Interface :: nocRegister( void * mem, size_t size )
+{
+ return m_mesgQueue.addNocReg( mem, size );
+}
+
+void Interface :: nocDeregister( memslot_t slot)
+{
+ m_mesgQueue.removeReg(slot);
+}
+
+err_t Interface :: nocResizeMemreg( size_t nRegs )
+{
+ return m_mesgQueue.resizeMemreg(nRegs);
+}
+
+void Interface :: nocPut( memslot_t srcSlot, size_t srcOffset,
+ pid_t dstPid, memslot_t dstSlot, size_t dstOffset,
+ size_t size )
+{
+ m_mesgQueue.put( srcSlot, srcOffset,
+ dstPid, dstSlot, dstOffset,
+ size );
+}
+
+void Interface :: nocGet( pid_t srcPid, memslot_t srcSlot, size_t srcOffset,
+ memslot_t dstSlot, size_t dstOffset,
+ size_t size )
+{
+ m_mesgQueue.get( srcPid, srcSlot, srcOffset,
+ dstSlot, dstOffset,
+ size );
+}
+
+err_t Interface :: serializeSlot(SlotID slot, char ** buff, size_t *buff_size)
+{
+ return m_mesgQueue.serializeSlot(slot, buff, buff_size);
+}
+
+err_t Interface :: deserializeSlot(char * buff, SlotID slot)
+{
+ return m_mesgQueue.deserializeSlot(buff, slot);
+}
+/* end NOC extensions */
+
pid_t Interface :: isAborted() const
{
return m_aborted;
diff --git a/src/MPI/interface.hpp b/src/MPI/interface.hpp
index 02e48b3c..004e9edc 100644
--- a/src/MPI/interface.hpp
+++ b/src/MPI/interface.hpp
@@ -61,6 +61,23 @@ class _LPFLIB_LOCAL Interface
err_t resizeMesgQueue( size_t nMsgs ) ; // nothrow
void abort() ; // nothrow
+ //
+ /* start NOC extensions */
+ memslot_t nocRegister( void * mem, size_t size ) ; // nothrow
+ void nocDeregister( memslot_t slot) ; // nothrow
+ err_t nocResizeMemreg( size_t nRegs ) ; // nothrow
+ void nocPut( memslot_t srcSlot, size_t srcOffset,
+ pid_t dstPid, memslot_t dstSlot, size_t dstOffset,
+ size_t size ) ; // nothrow
+
+ void nocGet( pid_t srcPid, memslot_t srcSlot, size_t srcOffset,
+ memslot_t dstSlot, size_t dstOffset,
+ size_t size ) ;// nothrow
+
+ err_t serializeSlot(memslot_t slot, char ** buff, size_t *buff_size);
+
+ err_t deserializeSlot(char * buff, memslot_t slot);
+ /* end NOC extensions */
pid_t isAborted() const ;
@@ -82,6 +99,8 @@ class _LPFLIB_LOCAL Interface
void getSentMsgCountPerSlot(size_t * msgs, SlotID slot);
+ void getSentMsgCount(size_t * msgs);
+
void getRcvdMsgCount(size_t * msgs);
void flushSent();
@@ -108,6 +127,7 @@ class _LPFLIB_LOCAL Interface
mpi::Comm m_comm;
Process & m_subprocess;
MessageQueue m_mesgQueue;
+
pid_t m_aborted;
static Interface * s_root;
diff --git a/src/MPI/memorytable.cpp b/src/MPI/memorytable.cpp
index 51947985..4ebb546d 100644
--- a/src/MPI/memorytable.cpp
+++ b/src/MPI/memorytable.cpp
@@ -23,8 +23,10 @@
namespace lpf {
MemoryTable :: MemoryTable( Communication & comm
-#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+#if defined LPF_CORE_MPI_USES_ibverbs
, mpi::IBVerbs & ibverbs
+#elif defined LPF_CORE_MPI_USES_zero
+ , mpi::IBVerbsNoc & ibverbs
#endif
)
: m_memreg()
@@ -42,6 +44,17 @@ MemoryTable :: MemoryTable( Communication & comm
{ (void) comm; }
+MemoryTable :: Slot
+MemoryTable :: addNoc( void * mem, std::size_t size ) // nothrow
+{
+#if defined LPF_CORE_MPI_USES_zero
+ Memory rec( mem, size, m_ibverbs.regNoc(mem, size));
+ return m_memreg.addNocReg( rec);
+#else
+ return m_memreg.invalidSlot();
+#endif
+}
+
MemoryTable :: Slot
MemoryTable :: addLocal( void * mem, std::size_t size ) // nothrow
{
@@ -53,6 +66,15 @@ MemoryTable :: addLocal( void * mem, std::size_t size ) // nothrow
return m_memreg.addLocalReg( rec);
}
+#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+mpi::IBVerbs::SlotID MemoryTable :: getVerbID(MemoryTable::Slot slot) const
+{
+ Memory sl = m_memreg.lookup(slot);
+ ASSERT(sl.slot != m_memreg.invalidSlot());
+ return m_memreg.lookup( slot ).slot;
+}
+#endif
+
MemoryTable :: Slot
MemoryTable :: addGlobal( void * mem, std::size_t size ) // nothrow
{
diff --git a/src/MPI/memorytable.hpp b/src/MPI/memorytable.hpp
index 05c01eee..1308aa33 100644
--- a/src/MPI/memorytable.hpp
+++ b/src/MPI/memorytable.hpp
@@ -23,8 +23,10 @@
#include "assert.hpp"
#include "linkage.hpp"
-#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+#if defined LPF_CORE_MPI_USES_ibverbs
#include "ibverbs.hpp"
+#elif defined LPF_CORE_MPI_USES_zero
+#include "ibverbsNoc.hpp"
#endif
@@ -64,14 +66,18 @@ class _LPFLIB_LOCAL MemoryTable
static Slot invalidSlot()
{ return Register::invalidSlot(); }
-#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+#if defined LPF_CORE_MPI_USES_ibverbs
explicit MemoryTable( Communication & comm, mpi::IBVerbs & verbs );
+#elif defined LPF_CORE_MPI_USES_zero
+ explicit MemoryTable( Communication & comm, mpi::IBVerbsNoc & verbs );
#else
explicit MemoryTable( Communication & comm );
#endif
Slot addLocal( void * mem, std::size_t size ) ; // nothrow
+ Slot addNoc( void * mem, std::size_t size ) ; // nothrow
+
Slot addGlobal( void * mem, std::size_t size ); // nothrow
void remove( Slot slot ); // nothrow
@@ -90,8 +96,7 @@ class _LPFLIB_LOCAL MemoryTable
#endif
#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
- mpi::IBVerbs::SlotID getVerbID( Slot slot ) const
- { return m_memreg.lookup( slot ).slot; }
+ mpi::IBVerbs::SlotID getVerbID( Slot slot ) const;
#endif
void reserve( size_t size ); // throws bad_alloc, strong safe
@@ -117,9 +122,13 @@ class _LPFLIB_LOCAL MemoryTable
Communication & m_comm;
#endif
-#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+#if defined LPF_CORE_MPI_USES_ibverbs
+ DirtyList m_added;
+ mpi::IBVerbs & m_ibverbs;
+ Communication & m_comm;
+#elif defined LPF_CORE_MPI_USES_zero
DirtyList m_added;
- mpi::IBVerbs & m_ibverbs;
+ mpi::IBVerbsNoc & m_ibverbs;
Communication & m_comm;
#endif
};
diff --git a/src/MPI/mesgqueue.cpp b/src/MPI/mesgqueue.cpp
index f81a618a..0c0f05f2 100644
--- a/src/MPI/mesgqueue.cpp
+++ b/src/MPI/mesgqueue.cpp
@@ -16,6 +16,7 @@
*/
#include "mesgqueue.hpp"
+#include "ibverbs.hpp"
#include "mpilib.hpp"
#include "log.hpp"
#include "assert.hpp"
@@ -103,13 +104,13 @@ MessageQueue :: MessageQueue( Communication & comm )
, m_bodySends()
, m_bodyRecvs()
, m_comm( dynamic_cast(comm) )
+ , m_tinyMsgBuf( m_tinyMsgSize + largestHeader(m_nprocs, m_memRange, 0, 0))
#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
- , m_ibverbs( m_comm )
+ , m_ibverbs(m_comm)
, m_memreg( m_comm, m_ibverbs )
#else
, m_memreg( m_comm )
#endif
- , m_tinyMsgBuf( m_tinyMsgSize + largestHeader(m_nprocs, m_memRange, 0, 0))
{
m_memreg.reserve(1); // reserve slot for edgeBuffer
}
@@ -243,6 +244,48 @@ err_t MessageQueue :: resizeMemreg( size_t nRegs )
return LPF_SUCCESS;
}
+
+memslot_t MessageQueue :: addNocReg( void * mem, std::size_t size)
+{
+ memslot_t slot = m_memreg.addNoc( mem, size );
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
+ if (size > 0)
+ m_msgsort.addRegister( slot, static_cast( mem ), size);
+ return slot;
+}
+
+err_t MessageQueue :: serializeSlot(memslot_t slot, char ** mem, std::size_t * size)
+{
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
+ ASSERT(mem != nullptr);
+ ASSERT(size != nullptr);
+#ifdef LPF_CORE_MPI_USES_zero
+ auto mr = m_ibverbs.getMR(m_memreg.getVerbID(slot), m_pid);
+ *size = mr.serialize(mem);
+ return LPF_SUCCESS;
+#else
+ LOG( 3, "Error: serialize slot is only implemented for zero engine at the moment.");
+ return LPF_ERR_FATAL;
+#endif
+
+}
+
+err_t MessageQueue :: deserializeSlot(char * mem, memslot_t slot)
+{
+ ASSERT(mem != nullptr);
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
+#ifdef LPF_CORE_MPI_USES_zero
+ auto mr = mpi::MemoryRegistration::deserialize(mem);
+ m_ibverbs.setMR(m_memreg.getVerbID(slot), mr->_pid, *mr);
+ return LPF_SUCCESS;
+#else
+ LOG( 3, "Error: deserialize slot is only implemented for zero engine at the moment.");
+ return LPF_ERR_FATAL;
+#endif
+
+}
+
+
memslot_t MessageQueue :: addLocalReg( void * mem, std::size_t size)
{
memslot_t slot = m_memreg.addLocal( mem, size );
@@ -324,6 +367,12 @@ void MessageQueue :: get( pid_t srcPid, memslot_t srcSlot, size_t srcOffset,
void MessageQueue :: lockSlot( memslot_t srcSlot, size_t srcOffset,
pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size )
{
+ ASSERT(srcSlot != LPF_INVALID_MEMSLOT);
+ ASSERT(dstSlot != LPF_INVALID_MEMSLOT);
+ (void) srcOffset;
+ (void) dstOffset;
+ (void) dstPid;
+ (void) size;
#ifdef LPF_CORE_MPI_USES_zero
m_ibverbs.blockingCompareAndSwap(m_memreg.getVerbID(srcSlot), srcOffset, dstPid, m_memreg.getVerbID(dstSlot), dstOffset, size, 0ULL, 1ULL);
#endif
@@ -332,6 +381,12 @@ m_ibverbs.blockingCompareAndSwap(m_memreg.getVerbID(srcSlot), srcOffset, dstPid,
void MessageQueue :: unlockSlot( memslot_t srcSlot, size_t srcOffset,
pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size )
{
+ ASSERT(srcSlot != LPF_INVALID_MEMSLOT);
+ ASSERT(dstSlot != LPF_INVALID_MEMSLOT);
+ (void) srcOffset;
+ (void) dstOffset;
+ (void) dstPid;
+ (void) size;
#ifdef LPF_CORE_MPI_USES_zero
m_ibverbs.blockingCompareAndSwap(m_memreg.getVerbID(srcSlot), srcOffset, dstPid, m_memreg.getVerbID(dstSlot), dstOffset, size, 1ULL, 0ULL);
#endif
@@ -389,6 +444,7 @@ int MessageQueue :: sync( bool abort )
{
#ifdef LPF_CORE_MPI_USES_zero
// if not, deal with normal sync
+ (void) abort;
m_memreg.sync();
m_ibverbs.sync(m_resized);
m_resized = false;
@@ -1018,32 +1074,33 @@ int MessageQueue :: sync( bool abort )
}
-int MessageQueue :: countingSyncPerSlot(SlotID slot, size_t expected_sent, size_t expected_rcvd)
+int MessageQueue :: countingSyncPerSlot(memslot_t slot, size_t expected_sent, size_t expected_rcvd)
{
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
+ (void) expected_sent;
+ (void) expected_rcvd;
#ifdef LPF_CORE_MPI_USES_zero
// if not, deal with normal sync
m_memreg.sync();
-
- m_ibverbs.countingSyncPerSlot(m_resized, slot, expected_sent, expected_rcvd);
-
+ m_ibverbs.countingSyncPerSlot(m_memreg.getVerbID(slot), expected_sent, expected_rcvd);
m_resized = false;
+
#endif
return 0;
}
-int MessageQueue :: syncPerSlot(SlotID slot)
+int MessageQueue :: syncPerSlot(memslot_t slot)
{
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
#ifdef LPF_CORE_MPI_USES_zero
// if not, deal with normal sync
m_memreg.sync();
-
- m_ibverbs.syncPerSlot(m_resized, slot);
-
+ m_ibverbs.syncPerSlot(m_memreg.getVerbID(slot));
m_resized = false;
#endif
@@ -1051,28 +1108,41 @@ int MessageQueue :: syncPerSlot(SlotID slot)
}
-void MessageQueue :: getRcvdMsgCountPerSlot(size_t * msgs, SlotID slot)
+void MessageQueue :: getRcvdMsgCountPerSlot(size_t * msgs, memslot_t slot)
{
+ ASSERT(msgs != nullptr);
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
#ifdef LPF_CORE_MPI_USES_zero
*msgs = 0;
- m_ibverbs.get_rcvd_msg_count_per_slot(msgs, slot);
+ m_ibverbs.get_rcvd_msg_count_per_slot(msgs, m_memreg.getVerbID(slot));
#endif
}
void MessageQueue :: getRcvdMsgCount(size_t * msgs)
{
+ ASSERT(msgs != nullptr);
#ifdef LPF_CORE_MPI_USES_zero
*msgs = 0;
- m_ibverbs.get_rcvd_msg_count(msgs);
+ m_ibverbs.get_rcvd_msg_count(msgs);
#endif
}
-void MessageQueue :: getSentMsgCountPerSlot(size_t * msgs, SlotID slot)
+void MessageQueue :: getSentMsgCount(size_t * msgs)
+{
+ ASSERT(msgs != nullptr);
+#ifdef LPF_CORE_MPI_USES_zero
+ *msgs = 0;
+ m_ibverbs.get_sent_msg_count(msgs);
+#endif
+}
+void MessageQueue :: getSentMsgCountPerSlot(size_t * msgs, memslot_t slot)
{
+ ASSERT(msgs != nullptr);
+ ASSERT(slot != LPF_INVALID_MEMSLOT);
#ifdef LPF_CORE_MPI_USES_zero
*msgs = 0;
- m_ibverbs.get_sent_msg_count_per_slot(msgs, slot);
+ m_ibverbs.get_sent_msg_count_per_slot(msgs, m_memreg.getVerbID(slot));
#endif
}
diff --git a/src/MPI/mesgqueue.hpp b/src/MPI/mesgqueue.hpp
index b4f1f796..198afa04 100644
--- a/src/MPI/mesgqueue.hpp
+++ b/src/MPI/mesgqueue.hpp
@@ -26,6 +26,9 @@
#include "messagesort.hpp"
#include "mpilib.hpp"
#include "linkage.hpp"
+#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+#include "ibverbsNoc.hpp"
+#endif
#if __cplusplus >= 201103L
#include
@@ -33,12 +36,7 @@
#include
#endif
-#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
-#include "ibverbs.hpp"
-#endif
-
//only for HiCR
-typedef size_t SlotID;
namespace lpf {
@@ -53,7 +51,9 @@ class _LPFLIB_LOCAL MessageQueue
memslot_t addLocalReg( void * mem, std::size_t size );
+
memslot_t addGlobalReg( void * mem, std::size_t size );
+
void removeReg( memslot_t slot );
void get( pid_t srcPid, memslot_t srcSlot, size_t srcOffset,
@@ -67,31 +67,36 @@ class _LPFLIB_LOCAL MessageQueue
int sync( bool abort );
//only for HiCR
-//#ifdef
void lockSlot( memslot_t srcSlot, size_t srcOffset,
pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size );
void unlockSlot( memslot_t srcSlot, size_t srcOffset,
pid_t dstPid, memslot_t dstSlot, size_t dstOffset, size_t size );
- void getRcvdMsgCountPerSlot(size_t * msgs, SlotID slot);
+ void getRcvdMsgCountPerSlot(size_t * msgs, memslot_t slot);
void getRcvdMsgCount(size_t * msgs);
- void getSentMsgCountPerSlot(size_t * msgs, SlotID slot);
+ void getSentMsgCountPerSlot(size_t * msgs, memslot_t slot);
+
+ void getSentMsgCount(size_t * msgs);
void flushSent();
void flushReceived();
- int countingSyncPerSlot(SlotID slot, size_t expected_sent, size_t expected_rcvd);
+ int countingSyncPerSlot(memslot_t slot, size_t expected_sent, size_t expected_rcvd);
+
+ int syncPerSlot(memslot_t slot);
+ // NOC extensions
+ memslot_t addNocReg( void * mem, std::size_t size );
- int syncPerSlot(SlotID slot);
+ err_t serializeSlot(memslot_t slot, char ** buff, std::size_t * buff_size);
+ err_t deserializeSlot(char * buff, memslot_t slot);
// end only for HiCR
-//#endif
private:
- enum Msgs { BufPut ,
+ enum Msgs { BufPut ,
BufGet, BufGetReply,
HpPut, HpGet , HpBodyReply ,
HpEdges, HpEdgesReply };
@@ -100,7 +105,7 @@ class _LPFLIB_LOCAL MessageQueue
SrcPid, DstPid,
SrcOffset, DstOffset, BufOffset,
SrcSlot, DstSlot, Size,
- RoundedDstOffset, RoundedSize,
+ RoundedDstOffset, RoundedSize,
Payload, Head, Tail};
struct Edge {
@@ -160,11 +165,14 @@ class _LPFLIB_LOCAL MessageQueue
std::vector< Body > m_bodySends;
std::vector< Body > m_bodyRecvs;
mpi::Comm m_comm;
-#if defined LPF_CORE_MPI_USES_ibverbs || defined LPF_CORE_MPI_USES_zero
+ std::vector< char > m_tinyMsgBuf;
+protected:
+#if defined LPF_CORE_MPI_USES_ibverbs
mpi::IBVerbs m_ibverbs;
+#elif defined LPF_CORE_MPI_USES_zero
+ mpi::IBVerbsNoc m_ibverbs;
#endif
MemoryTable m_memreg;
- std::vector< char > m_tinyMsgBuf;
};
diff --git a/src/common/memreg.hpp b/src/common/memreg.hpp
index f48d519c..9e6c4b87 100644
--- a/src/common/memreg.hpp
+++ b/src/common/memreg.hpp
@@ -211,6 +211,7 @@ class CombinedMemoryRegister
void destroy() {
m_local.destroy();
m_global.destroy();
+ m_noc.destroy();
}
Slot addLocalReg( Record record ) // nothrow
@@ -218,6 +219,12 @@ class CombinedMemoryRegister
return toLocal( m_local.add( record ) );
}
+ Slot addNocReg( Record record ) // nothrow
+ {
+ Slot a = toNoc( m_noc.add( record ) );
+ return a;
+ }
+
Slot addGlobalReg( Record record ) // nothrow
{
return toGlobal( m_global.add(record) );
@@ -227,24 +234,31 @@ class CombinedMemoryRegister
{
if (isLocalSlot(slot))
m_local.remove( fromLocal(slot) ) ;
- else
+ else if (isGlobalSlot(slot))
m_global.remove( fromGlobal( slot ) );
+ else
+ m_noc.remove(fromNoc( slot ) );
}
const Record & lookup( Slot slot ) const // nothrow
{
if (isLocalSlot(slot))
return m_local.lookup( fromLocal(slot));
- else
+ else if (isGlobalSlot(slot))
return m_global.lookup( fromGlobal( slot ));
+ else {// isNocSlot(slot) == true
+ return m_noc.lookup( fromNoc( slot ));
+ }
}
Record & update( Slot slot ) // nothrow
{
if (isLocalSlot(slot))
return m_local.update( fromLocal(slot));
- else
+ else if (isGlobalSlot(slot))
return m_global.update( fromGlobal( slot ));
+ else // noc Slot
+ return m_noc.update(fromNoc(slot));
}
void reserve( size_t size, const Record & defaultRecord = Record() )
@@ -252,36 +266,50 @@ class CombinedMemoryRegister
{
m_global.reserve( size, defaultRecord );
m_local.reserve( size, defaultRecord );
+ m_noc.reserve( size, defaultRecord );
}
size_t capacity( ) const
{
- return std::min( m_global.capacity(), m_local.capacity() );
+ return std::min(std::min( m_global.capacity(), m_local.capacity()), m_noc.capacity() );
}
size_t range() const
{
- return std::max( 2*m_global.capacity(), 2*m_local.capacity()+1 );
+ return std::max(std::max( 3*m_global.capacity(), 3*m_local.capacity()+1), 3*m_noc.capacity()+2);
}
static bool isLocalSlot( Slot slot )
- { return slot % 2 == 1; }
+ { return slot % 3 == 1; }
+
+ static bool isGlobalSlot( Slot slot )
+ { return slot % 3 == 0; }
+
+ static bool isNocSlot( Slot slot )
+ { return slot % 3 == 2; }
private:
static Slot fromGlobal( Slot slot )
- { return slot / 2; }
+ { return slot / 3; }
static Slot fromLocal( Slot slot )
- { return (slot - 1) / 2; }
+ { return (slot - 1) / 3; }
+
+ static Slot fromNoc( Slot slot )
+ { return (slot - 2) / 3; }
static Slot toGlobal( Slot slot )
- { return 2*slot; }
+ { return 3*slot; }
static Slot toLocal( Slot slot )
- { return 2*slot + 1; }
+ { return 3*slot + 1; }
+
+ static Slot toNoc( Slot slot )
+ { return 3*slot + 2; }
MemoryRegister m_local;
MemoryRegister m_global;
+ MemoryRegister m_noc;
};
} // namespace lpf
diff --git a/src/debug/CMakeLists.txt b/src/debug/CMakeLists.txt
index 28af2937..7f3f9c92 100644
--- a/src/debug/CMakeLists.txt
+++ b/src/debug/CMakeLists.txt
@@ -25,8 +25,8 @@ add_library( ${libname}
rwconflict.cpp
$
)
-target_link_libraries( ${libname} ${LIB_POSIX_THREADS})
-target_include_directories( ${libname} PRIVATE ${MPI_C_INCLUDE_PATH})
+target_link_libraries(${libname} ${LIB_POSIX_THREADS})
+target_include_directories(${libname} PRIVATE ${MPI_C_INCLUDE_PATH})
set_target_properties(${libname} PROPERTIES
SOVERSION ${SOVERSION}
)
@@ -35,6 +35,7 @@ install(TARGETS ${libname} EXPORT lpf
RUNTIME DESTINATION ${INSTALL_BIN}
LIBRARY DESTINATION ${INSTALL_LIB}
ARCHIVE DESTINATION ${INSTALL_LIB}
- )
+)
-add_gtest(rwconflict_test "pthread" ON rwconflict.t.cpp rwconflict.cpp)
+add_gtest(rwconflict_test "pthread" rwconflict.t.cpp rwconflict.cpp)
+ #$ )
diff --git a/src/debug/core.cpp b/src/debug/core.cpp
index 00f025f6..c3d0adec 100644
--- a/src/debug/core.cpp
+++ b/src/debug/core.cpp
@@ -16,6 +16,7 @@
*/
#include "debug/lpf/core.h"
+#include "lpf/abort.h"
#undef lpf_get
#undef lpf_put
@@ -29,12 +30,6 @@
#undef lpf_exec
#undef lpf_hook
#undef lpf_rehook
-#undef lpf_abort
-#undef lpf_get_rcvd_msg_count
-#undef lpf_get_rcvd_msg_count_per_slot
-#undef lpf_get_sent_msg_count_per_slot
-#undef lpf_flush
-#undef lpf_abort
#undef lpf_init_t
#undef lpf_pid_t
@@ -62,6 +57,7 @@
#undef LPF_NONE
#undef LPF_INIT_NONE
#undef LPF_NO_ARGS
+#undef LPF_HAS_ABORT
#if __cplusplus >= 201103L
#include
@@ -105,9 +101,21 @@ class _LPFLIB_LOCAL Interface {
}
static void threadInit() {
+ // in the below we use std::abort as these are critical *internal*
+ // errors, not errors in the use of LPF core functionality.
+ // By contrast, errors that appear due to misuse of the LPF core primitives
+ // should call lpf_abort. This initialiser ensures that the underlying LPF
+ // engine has support for lpf_abort.
+ // The above logic about when to std::abort and when to lpf_abort is applied
+ // consistently in the below implementation. Only (seemingly) exceptions will
+ // be documented henceforth.
int rc = pthread_key_create( &s_threadKeyCtxStore, &destroyCtxStore );
if (rc) {
- LOG( 0, "Internal error while initializing thread static storage");
+ LOG( 0, "Internal error while initializing thread static storage" );
+ std::abort();
+ }
+ if( ! LPF_HAS_ABORT ) {
+ LOG( 0, "Debug layer relies on lpf_abort, but selected engine does not support it" );
std::abort();
}
}
@@ -491,6 +499,13 @@ class _LPFLIB_LOCAL Interface {
static lpf_err_t hook( const char * file, int line,
lpf_init_t init, lpf_spmd_t spmd, lpf_args_t args )
{
+ // the lpf_hook could arise from any non-LPF context -- this is in fact
+ // why it exists: hooking from within an LPF context to create a subcontext is
+ // provided by lpf_rehook instead.
+ // Because the callee context is potentially not controlled by the underlying
+ // LPF engine, and because the callee context in the non-trivial case consists
+ // of multiple distributed processes, we cannot rely on lpf_abort. The only
+ // thing we can do is rely on the standard abort.
if ( spmd == NULL ) {
LOG( 0, file << ":" << line
<< ": Invalid argument passed to lpf_hook: NULL spmd argument" );
@@ -703,18 +718,6 @@ class _LPFLIB_LOCAL Interface {
return LPF_SUCCESS;
}
- lpf_err_t get_rcvd_msg_count_per_slot(size_t *msgs, lpf_memslot_t slot) {
- return LPF_SUCCESS;
- }
-
- lpf_err_t get_sent_msg_count_per_slot(size_t *msgs, lpf_memslot_t slot) {
- return LPF_SUCCESS;
- }
-
- lpf_err_t get_rcvd_msg_count(size_t *msgs) {
- return LPF_SUCCESS;
- }
-
lpf_err_t register_local( const char * file, int line,
void * pointer, size_t size, lpf_memslot_t * memslot )
{
@@ -1023,7 +1026,6 @@ class _LPFLIB_LOCAL Interface {
return LPF_SUCCESS;
}
-
lpf_err_t abort(const char * file, int line) {
(void) file;
(void) line;
diff --git a/src/hybrid/CMakeLists.txt b/src/hybrid/CMakeLists.txt
index c2a87b14..ea1a3885 100644
--- a/src/hybrid/CMakeLists.txt
+++ b/src/hybrid/CMakeLists.txt
@@ -20,8 +20,10 @@ if (HYBRID_ENGINE_ENABLED)
set(LPF_IMPL_ID hybrid)
set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME})
-set(LPFLIB_HYBRID_MPI_ENGINE "ibverbs" CACHE STRING
- "Choice of MPI engine to use for inter-process communication")
+if( NOT DEFINED LPFLIB_HYBRID_MPI_ENGINE )
+ message( FATAL_ERROR "Hybrid engine is enabled but no inter-node engine was selected" )
+endif()
+
set(mpi_engine "${LPFLIB_HYBRID_MPI_ENGINE}" )
message( STATUS "Hybrid implementation's multi-node layer is '${mpi_engine}'")
diff --git a/src/hybrid/core.cpp b/src/hybrid/core.cpp
index 39226a18..16b738d6 100644
--- a/src/hybrid/core.cpp
+++ b/src/hybrid/core.cpp
@@ -37,6 +37,10 @@
extern "C" {
+// the value 2 in this implementation indicates support for lpf_abort in a way
+// that may deviate from the stdlib abort()
+_LPFLIB_VAR const int LPF_HAS_ABORT = 2;
+
_LPFLIB_VAR const lpf_err_t LPF_SUCCESS = 0;
_LPFLIB_VAR const lpf_err_t LPF_ERR_OUT_OF_MEMORY = 1;
_LPFLIB_VAR const lpf_err_t LPF_ERR_FATAL = 2;
@@ -414,11 +418,15 @@ _LPFLIB_API lpf_err_t lpf_get_rcvd_msg_count( lpf_t ctx, size_t * rcvd_msgs)
_LPFLIB_API lpf_err_t lpf_get_rcvd_msg_count_per_slot( lpf_t ctx, size_t * rcvd_msgs, lpf_memslot_t slot )
{
+
using namespace lpf::hybrid;
+ if (ctx == LPF_SINGLE_PROCESS)
+ return LPF_SUCCESS;
ThreadState * t = realContext(ctx);
- MPI mpi = t->nodeState().mpi();
- mpi.abort();
- return LPF_SUCCESS;
+ if (!t->error())
+ return t->getRcvdMsgCountPerSlot(rcvd_msgs, slot);
+ else
+ return LPF_SUCCESS;
}
_LPFLIB_API lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t * sent_msgs, lpf_memslot_t slot )
@@ -428,7 +436,19 @@ _LPFLIB_API lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t ctx, size_t * sent_
return LPF_SUCCESS;
ThreadState * t = realContext(ctx);
if (!t->error())
- return t->getSentMsgCount(sent_msgs, slot);
+ return t->getSentMsgCountPerSlot(sent_msgs, slot);
+ else
+ return LPF_SUCCESS;
+}
+
+_LPFLIB_API lpf_err_t lpf_get_sent_msg_count( lpf_t ctx, size_t * sent_msgs)
+{
+ using namespace lpf::hybrid;
+ if (ctx == LPF_SINGLE_PROCESS)
+ return LPF_SUCCESS;
+ ThreadState * t = realContext(ctx);
+ if (!t->error())
+ return t->getSentMsgCount(sent_msgs);
else
return LPF_SUCCESS;
}
diff --git a/src/hybrid/dispatch.hpp b/src/hybrid/dispatch.hpp
index 2dc83c2b..e9f6b6b6 100644
--- a/src/hybrid/dispatch.hpp
+++ b/src/hybrid/dispatch.hpp
@@ -19,23 +19,29 @@
#define LPF_CORE_HYBRID_DISPATCH_HPP
#undef LPFLIB_CORE_H
+#undef LPFLIB_ABORT_H
#define LPF_CORE_STATIC_DISPATCH
#define LPF_CORE_STATIC_DISPATCH_ID pthread
#define LPF_CORE_STATIC_DISPATCH_CONFIG LPF_CORE_IMPL_CONFIG
#include
+#include
#undef LPF_CORE_STATIC_DISPATCH_ID
#undef LPF_CORE_STATIC_DISPATCH_CONFIG
#undef LPFLIB_CORE_H
+#undef LPFLIB_ABORT_H
#define LPF_CORE_STATIC_DISPATCH_ID LPF_CORE_MULTI_NODE_ENGINE
#define LPF_CORE_STATIC_DISPATCH_CONFIG LPF_CORE_IMPL_CONFIG
#include
+#include
#undef LPF_CORE_STATIC_DISPATCH_ID
#undef LPF_CORE_STATIC_DISPATCH_CONFIG
#undef LPFLIB_CORE_H
+#undef LPFLIB_ABORT_H
#undef LPF_CORE_STATIC_DISPATCH
#include
+#include
#define USE_THREAD( symbol ) \
LPF_RENAME_PRIMITIVE4( lpf, pthread, LPF_CORE_IMPL_CONFIG, symbol )
@@ -121,6 +127,9 @@ namespace lpf { namespace hybrid {
err_t get_rcvd_msg_count( size_t * rcvd_msgs)
{ return USE_THREAD( get_rcvd_msg_count)(m_ctx, rcvd_msgs); }
+ err_t get_sent_msg_count( size_t * sent_msgs)
+ { return USE_THREAD( get_sent_msg_count)(m_ctx, sent_msgs); }
+
err_t flush_sent()
{ return USE_THREAD(flush_sent)(m_ctx); }
@@ -223,15 +232,18 @@ namespace lpf { namespace hybrid {
err_t deregister( memslot_t memslot)
{ return USE_MPI( deregister)(m_ctx, memslot); }
+ err_t get_rcvd_msg_count( size_t * rcvd_msgs)
+ { return USE_MPI( get_rcvd_msg_count)(m_ctx, rcvd_msgs); }
+
err_t get_rcvd_msg_count_per_slot(size_t *rcvd_msgs, lpf_memslot_t slot)
{ return USE_MPI( get_rcvd_msg_count_per_slot)( m_ctx, rcvd_msgs, slot); }
+ err_t get_sent_msg_count( size_t * sent_msgs)
+ { return USE_MPI( get_sent_msg_count)(m_ctx, sent_msgs); }
+
err_t get_sent_msg_count_per_slot(size_t *sent_msgs, lpf_memslot_t slot)
{ return USE_MPI( get_sent_msg_count_per_slot)( m_ctx, sent_msgs, slot); }
- err_t get_rcvd_msg_count( size_t * rcvd_msgs)
- { return USE_MPI( get_rcvd_msg_count)(m_ctx, rcvd_msgs); }
-
err_t flush_sent()
{return USE_MPI( flush_sent)(m_ctx);}
diff --git a/src/hybrid/state.hpp b/src/hybrid/state.hpp
index 06e8faf3..f890be6b 100644
--- a/src/hybrid/state.hpp
+++ b/src/hybrid/state.hpp
@@ -111,13 +111,6 @@ class _LPFLIB_LOCAL NodeState {
return m_mpi.sync();
}
-// MPI::err_t counting_sync_per_slot(lpf_memslot_t slot, size_t expected_sent, size_t expected_rcvd)
-// {
-// m_memreg.flush( m_mpi );
-// m_msgQueue.flush( m_mpi, m_memreg );
-// return m_mpi.counting_sync_per_slot(slot, expected_sent, expected_rcvd);
-// }
-
static double messageGap( lpf_pid_t nprocs, size_t minMsgSize, lpf_sync_attr_t attr)
{
(void) nprocs;
@@ -422,14 +415,19 @@ class _LPFLIB_LOCAL ThreadState {
bool error() const { return m_error; }
- lpf_pid_t getRcvdMsgCount(size_t * rcvd_msgs, lpf_memslot_t slot) {
+ lpf_pid_t getRcvdMsgCountPerSlot(size_t * rcvd_msgs, lpf_memslot_t slot) {
return m_nodeState.mpi().get_rcvd_msg_count_per_slot(rcvd_msgs, slot);
}
- lpf_pid_t getSentMsgCount(size_t * sent_msgs, lpf_memslot_t slot) {
+ lpf_pid_t getSentMsgCountPerSlot(size_t * rcvd_msgs, lpf_memslot_t slot) {
+
+ return m_nodeState.mpi().get_sent_msg_count_per_slot(rcvd_msgs, slot);
+ }
+
+ lpf_pid_t getSentMsgCount(size_t * sent_msgs) {
- return m_nodeState.mpi().get_sent_msg_count_per_slot(sent_msgs, slot);
+ return m_nodeState.mpi().get_sent_msg_count(sent_msgs);
}
lpf_pid_t getRcvdMsgCount(size_t * rcvd_msgs) {
diff --git a/src/imp/core.c b/src/imp/core.c
index 7b4c3db2..994a18fd 100644
--- a/src/imp/core.c
+++ b/src/imp/core.c
@@ -16,12 +16,15 @@
*/
#include
+#include
#include
#include
#include
#include
+const int LPF_HAS_ABORT = 0;
+
const lpf_err_t LPF_SUCCESS = 0;
const lpf_err_t LPF_ERR_OUT_OF_MEMORY = 1;
const lpf_err_t LPF_ERR_FATAL = 2;
@@ -141,6 +144,9 @@ lpf_err_t lpf_counting_sync_per_slot( lpf_t lpf, lpf_sync_attr_t attr, lpf_memsl
{
(void) lpf;
(void) attr;
+ (void) slot;
+ (void) expected_sent;
+ (void) expected_rcvd;
return LPF_SUCCESS;
}
@@ -154,6 +160,15 @@ lpf_err_t lpf_lock_slot(
size_t size,
lpf_msg_attr_t attr
) {
+
+ (void) ctx;
+ (void) src_slot;
+ (void) src_offset;
+ (void) dst_pid;
+ (void) dst_slot;
+ (void) dst_offset;
+ (void) size;
+ (void) attr;
return LPF_SUCCESS;
}
@@ -167,6 +182,14 @@ lpf_err_t lpf_unlock_slot(
size_t size,
lpf_msg_attr_t attr
) {
+ (void) ctx;
+ (void) src_slot;
+ (void) src_offset;
+ (void) dst_pid;
+ (void) dst_slot;
+ (void) dst_offset;
+ (void) size;
+ (void) attr;
return LPF_SUCCESS;
}
@@ -215,17 +238,26 @@ lpf_err_t lpf_resize_memory_register( lpf_t lpf, size_t max_regs )
lpf_err_t lpf_get_rcvd_msg_count_per_slot( lpf_t lpf, size_t * rcvd_msgs, lpf_memslot_t slot) {
(void) lpf;
*rcvd_msgs = 0;
+ (void) slot;
return LPF_SUCCESS;
}
lpf_err_t lpf_get_rcvd_msg_count( lpf_t lpf, size_t * rcvd_msgs) {
(void) lpf;
+ *rcvd_msgs = 0;
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_get_sent_msg_count( lpf_t lpf, size_t * sent_msgs) {
+ (void) lpf;
+ *sent_msgs = 0;
return LPF_SUCCESS;
}
lpf_err_t lpf_get_sent_msg_count_per_slot( lpf_t lpf, size_t * sent_msgs, lpf_memslot_t slot) {
(void) lpf;
*sent_msgs = 0;
+ (void) slot;
return LPF_SUCCESS;
}
@@ -239,3 +271,79 @@ lpf_err_t lpf_abort( lpf_t lpf)
(void) lpf;
return LPF_SUCCESS;
}
+
+lpf_err_t lpf_noc_resize_memory_register( lpf_t ctx, size_t max_regs )
+{
+ (void) ctx;
+ (void) max_regs;
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_register(
+ lpf_t ctx,
+ void * pointer,
+ size_t size,
+ lpf_memslot_t * memslot
+)
+{
+ (void) ctx;
+ (void) pointer;
+ (void) size;
+ (void) memslot;
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_deregister(
+ lpf_t ctx,
+ lpf_memslot_t memslot
+)
+{
+ (void) ctx;
+ (void) memslot;
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_put(
+ lpf_t ctx,
+ lpf_memslot_t src_slot,
+ size_t src_offset,
+ lpf_pid_t dst_pid,
+ lpf_memslot_t dst_slot,
+ size_t dst_offset,
+ size_t size,
+ lpf_msg_attr_t attr
+)
+{
+ (void) ctx;
+ (void) src_slot;
+ (void) src_offset;
+ (void) dst_pid;
+ (void) dst_slot;
+ (void) dst_offset;
+ (void) size;
+ (void) attr;
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_noc_get(
+ lpf_t ctx,
+ lpf_pid_t src_pid,
+ lpf_memslot_t src_slot,
+ size_t src_offset,
+ lpf_memslot_t dst_slot,
+ size_t dst_offset,
+ size_t size,
+ lpf_msg_attr_t attr
+)
+{
+ (void) ctx;
+ (void) src_pid;
+ (void) src_slot;
+ (void) src_offset;
+ (void) dst_slot;
+ (void) dst_offset;
+ (void) size;
+ (void) attr;
+
+ return LPF_SUCCESS;
+}
diff --git a/src/pthreads/barrier.cpp b/src/pthreads/barrier.cpp
index cacfbbf8..92442474 100644
--- a/src/pthreads/barrier.cpp
+++ b/src/pthreads/barrier.cpp
@@ -82,9 +82,9 @@ namespace {
{
#ifdef VALGRIND_MEMCHECK
#ifdef LPF_ON_MACOS
- pthread_yield_np();
+ sched_yield_np();
#else
- pthread_yield(); // allow other processes to progress
+ sched_yield(); // allow other processes to progress
#endif
#endif
}
@@ -145,7 +145,7 @@ namespace {
if (m_available )
_mm_mwait(0, 0);
else
- pthread_yield();
+ sched_yield();
}
bool m_available;
@@ -160,9 +160,9 @@ namespace {
void pause()
{
#ifdef LPF_ON_MACOS
- pthread_yield_np();
+ sched_yield_np();
#else
- if (pthread_yield()) {
+ if (sched_yield()) {
LOG(2, "While waiting, the Posix thread library failed to "
"yield the CPU to the OS" );
}
diff --git a/src/pthreads/core.cpp b/src/pthreads/core.cpp
index 763d9a44..38799e9a 100644
--- a/src/pthreads/core.cpp
+++ b/src/pthreads/core.cpp
@@ -17,6 +17,7 @@
#include
#include
+#include
#include "threadlocaldata.hpp"
#include "machineparams.hpp"
@@ -37,6 +38,10 @@
#include // for pthreads
+// the value 2 in this implementation indicates support for lpf_abort in a way
+// that may deviate from the stdlib abort()
+const int LPF_HAS_ABORT = 2;
+
const lpf_err_t LPF_SUCCESS = 0;
const lpf_err_t LPF_ERR_OUT_OF_MEMORY = 1;
const lpf_err_t LPF_ERR_FATAL = 2;
@@ -386,6 +391,7 @@ lpf_err_t lpf_resize_memory_register( lpf_t ctx, size_t max_regs )
}
lpf_err_t lpf_get_rcvd_msg_count_per_slot(lpf_t ctx, size_t * msgs, lpf_memslot_t slot) {
+ (void) slot;
*msgs = 0;
lpf::ThreadLocalData * t = realCtx(ctx);
if (t->isAborted())
@@ -403,6 +409,15 @@ lpf_err_t lpf_get_rcvd_msg_count(lpf_t ctx, size_t * msgs) {
}
lpf_err_t lpf_get_sent_msg_count_per_slot(lpf_t ctx, size_t * msgs, lpf_memslot_t slot) {
+ *msgs = 0;
+ (void) slot;
+ lpf::ThreadLocalData * t = realCtx(ctx);
+ if (t->isAborted())
+ return LPF_SUCCESS;
+ return LPF_SUCCESS;
+}
+
+lpf_err_t lpf_get_sent_msg_count(lpf_t ctx, size_t * msgs) {
*msgs = 0;
lpf::ThreadLocalData * t = realCtx(ctx);
if (t->isAborted())
diff --git a/src/pthreads/threadlocaldata.cpp b/src/pthreads/threadlocaldata.cpp
index 6a62e4d3..1923b272 100644
--- a/src/pthreads/threadlocaldata.cpp
+++ b/src/pthreads/threadlocaldata.cpp
@@ -442,6 +442,10 @@ err_t ThreadLocalData :: sync( bool expectExit)
}
err_t ThreadLocalData :: countingSyncPerSlot(bool expectExit, lpf_memslot_t slot, size_t expected_sent, size_t expected_rcvd) {
+ (void) expectExit;
+ (void) slot;
+ (void) expected_sent;
+ (void) expected_rcvd;
return LPF_SUCCESS;
}
diff --git a/test_launcher.py.in b/test_launcher.py.in
new file mode 100644
index 00000000..656e570f
--- /dev/null
+++ b/test_launcher.py.in
@@ -0,0 +1,38 @@
+import argparse
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser( description='Death test launcher' )
+parser.add_argument("-e", "--engine", type=str)
+parser.add_argument("-L", "--parallel_launcher", type=str)
+parser.add_argument("-p", "--min_process_count", type=int)
+parser.add_argument("-P", "--max_process_count", type=int)
+parser.add_argument("-t", "--lpf_probe_timer", type=float)
+parser.add_argument("-R", "--expected_return_code", type=int)
+parser.add_argument( 'cmd', nargs=argparse.REMAINDER )
+args = parser.parse_args()
+
+# This is only for passing Gtest info to CMake
+# The parallel launcher is still needed as Open MPI
+# binaries terminate without the launcher on our cluster,
+# even for single process runs
+if args.cmd[-1] == '--gtest_list_tests':
+ run_cmd = [args.parallel_launcher, '-engine', args.engine, '-n', '1'] + args.cmd
+ cmd = subprocess.run( run_cmd)
+ sys.exit(cmd.returncode)
+# Actual use of our launcher
+else:
+ for i in range(args.min_process_count, args.max_process_count+1):
+ if args.lpf_probe_timer > 0.0:
+ run_cmd = [args.parallel_launcher, '-engine', args.engine, '-probe', str(args.lpf_probe_timer), '-n', str(i)] + args.cmd
+ else:
+ run_cmd = [args.parallel_launcher, '-engine', args.engine, '-n', str(i)] + args.cmd
+ print("Run command: ")
+ print(run_cmd)
+ cmd = subprocess.run( run_cmd)
+ print("Test returned code = " + str(cmd.returncode))
+ retcode = cmd.returncode
+ if (retcode != args.expected_return_code):
+ print("Test " + args.cmd[0] + args.cmd[1] + "\nreturned\t" + str(retcode) + "\nexpected return code was: " + str(args.expected_return_code))
+ sys.exit(1)
+ print("Test " + args.cmd[0] + args.cmd[1] + " passed")
diff --git a/tests/functional/CMakeLists.txt b/tests/functional/CMakeLists.txt
index 11e4baf1..04ebf85d 100644
--- a/tests/functional/CMakeLists.txt
+++ b/tests/functional/CMakeLists.txt
@@ -77,26 +77,20 @@ set(test_sources
func_lpf_exec_single_call_single_arg_single_proc.cpp
func_lpf_get_parallel_alltoall.cpp
func_lpf_get_parallel_huge.cpp
- func_lpf_get_parallel_single.cpp
- #func_lpf_hook_simple.mpirma.cpp
- #func_lpf_hook_simple.pthread.cpp
- #func_lpf_hook_subset.mpimsg.cpp
- #func_lpf_hook_tcp.mpirma.cpp
- #func_lpf_hook_tcp_timeout.mpirma.cpp
- #func_lpf_put_parallel_bad_pattern.cpp <= in exception_list
- func_lpf_put_and_get_overlapping.cpp
func_lpf_get_parallel_overlapping_complete.cpp
- func_lpf_put_parallel_overlapping_complete.cpp
func_lpf_get_parallel_overlapping_pyramid.cpp
- func_lpf_put_parallel_overlapping_pyramid.cpp
func_lpf_get_parallel_overlapping_rooftiling.cpp
- func_lpf_put_parallel_overlapping_rooftiling.cpp
+ func_lpf_get_parallel_single.cpp
func_lpf_probe_parallel_full.cpp
func_lpf_probe_parallel_nested.cpp
func_lpf_probe_root.cpp
+ func_lpf_put_and_get_overlapping.cpp
func_lpf_put_parallel_alltoall.cpp
func_lpf_put_parallel_big.cpp
func_lpf_put_parallel_huge.cpp
+ func_lpf_put_parallel_overlapping_complete.cpp
+ func_lpf_put_parallel_overlapping_pyramid.cpp
+ func_lpf_put_parallel_overlapping_rooftiling.cpp
func_lpf_put_parallel_single.cpp
func_lpf_register_and_deregister_irregularly.cpp
func_lpf_register_and_deregister_many_global.cpp
@@ -142,21 +136,39 @@ foreach (LPF_IMPL_ID ${ENGINES})
string(REGEX MATCH "overlapping|early|bsplib" foundTest ${testSource})
if (NOT ${LPF_IMPL_ID} STREQUAL "zero")
add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}")
-
- string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource})
- get_filename_component(baseName ${testSource} NAME_WE )
- set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}")
elseif ("${foundTest}" STREQUAL "")
add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}")
-
- string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource})
- get_filename_component(baseName ${testSource} NAME_WE )
- set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}")
endif()
endforeach(testSource)
+
endforeach(LPF_IMPL_ID)
+# Individual test for NOC (Non-coherence) protocol, only for zero engine
+# (part of HiCR project)
+set(LPF_IMPL_CONFIG ${LPFLIB_CONFIG_NAME})
+set(mode "")
+set(exeName "func_lpf_test_noc_ring_zero_${LPF_IMPL_CONFIG}${mode}")
+add_gtest(${exeName} "zero" OFF "${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_test_noc_ring.cpp")
+
+# start of engine-specific tests
+foreach (LPF_IMPL_ID ${ENGINES})
+ if ("${LPF_IMPL_ID}" STREQUAL "pthread" OR "${LPF_IMPL_ID}" STREQUAL "mpirma")
+ foreach(testSource func_lpf_hook_simple.${LPF_IMPL_ID}.cpp)
+ string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource})
+ get_filename_component(baseName ${testSource} NAME_WE )
+ set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}")
+ add_gtest(${exeName} ${LPF_IMPL_ID} ON "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}")
+ endforeach(testSource)
+ endif()
+ if ("${LPF_IMPL_ID}" STREQUAL "mpimsg")
+ add_gtest(func_lpf_hook_subset.mpimsg mpimsg ON "${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_hook_subset.mpimsg.cpp")
+ endif()
+ if ("${LPF_IMPL_ID}" STREQUAL "mpirma")
+ add_gtest(func_lpf_hook_tcp_timeout.mpirma mpirma ON "${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_hook_tcp_timeout.mpirma.cpp")
+ endif()
+endforeach(LPF_IMPL_ID)
+# end of engine-specific tests
include_directories(.)
add_subdirectory(debug)
diff --git a/tests/functional/collectives/CMakeLists.txt b/tests/functional/collectives/CMakeLists.txt
index 463b4de5..80aaa137 100644
--- a/tests/functional/collectives/CMakeLists.txt
+++ b/tests/functional/collectives/CMakeLists.txt
@@ -17,21 +17,21 @@
set(c99_tests_sources
-func_lpf_allcombine.cpp
-func_lpf_allgather.cpp
-func_lpf_allgather_overlapped.cpp
-func_lpf_allreduce.cpp
-func_lpf_alltoall.cpp
-func_lpf_broadcast.cpp
-func_lpf_broadcast_prime_size_object.cpp
-func_lpf_broadcast_small_prime_size_object.cpp
-func_lpf_collectives_init.cpp
-func_lpf_collectives_init_overflow.cpp
-func_lpf_combine.cpp
-func_lpf_gather.cpp
-func_lpf_reduce.cpp
-func_lpf_scatter.cpp
-func_lpf_zero_cost.cpp
+ func_lpf_allcombine.cpp
+ func_lpf_allgather.cpp
+ func_lpf_allgather_overlapped.cpp
+ func_lpf_allreduce.cpp
+ func_lpf_alltoall.cpp
+ func_lpf_broadcast.cpp
+ func_lpf_broadcast_prime_size_object.cpp
+ func_lpf_broadcast_small_prime_size_object.cpp
+ func_lpf_collectives_init.cpp
+ func_lpf_collectives_init_overflow.cpp
+ func_lpf_combine.cpp
+ func_lpf_gather.cpp
+ func_lpf_reduce.cpp
+ func_lpf_scatter.cpp
+ func_lpf_zero_cost.cpp
)
foreach (LPF_IMPL_ID ${ENGINES})
@@ -51,9 +51,5 @@ foreach (LPF_IMPL_ID ${ENGINES})
add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}")
- string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${testSource})
- get_filename_component(baseName ${testSource} NAME_WE )
- set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}")
-
endforeach(testSource)
endforeach(LPF_IMPL_ID)
diff --git a/tests/functional/debug/CMakeLists.txt b/tests/functional/debug/CMakeLists.txt
index 0292d488..67ffcb5d 100644
--- a/tests/functional/debug/CMakeLists.txt
+++ b/tests/functional/debug/CMakeLists.txt
@@ -37,10 +37,6 @@ set(debug_test_sources
func_lpf_debug_global_deregister_order_mismatch.cpp
func_lpf_debug_global_deregister_unequal.cpp
func_lpf_debug_global_register_null_memreg.cpp
- #func_lpf_debug_hook_null_f_symbols.pthread.cpp
- #func_lpf_debug_hook_null_input.pthread.cpp
- #func_lpf_debug_hook_null_output.pthread.cpp
- #func_lpf_debug_hook_null_spmd.pthread.cpp
func_lpf_debug_local_register_null_memreg.cpp
func_lpf_debug_put_after_deregister_dest_after_sync.cpp
func_lpf_debug_put_after_deregister_dest.cpp
@@ -90,10 +86,18 @@ foreach (LPF_IMPL_ID ${ENGINES})
add_gtest(${exeName} ${LPF_IMPL_ID} ${debug} "${CMAKE_CURRENT_SOURCE_DIR}/${testSource}" )
- string(REGEX REPLACE "(.${LPF_IMPL_ID})?.cpp$" "" baseName ${CMAKE_CURRENT_SOURCE_DIR}/${testSource})
- get_filename_component(baseName ${testSource} NAME_WE )
- set(exeName "${baseName}_${LPF_IMPL_ID}_${LPF_IMPL_CONFIG}${mode}")
-
endforeach(testSource)
endforeach(LPF_IMPL_ID)
+add_gtest(func_lpf_debug_hook_f_symbols_pthread "pthread" ON
+ ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_f_symbols.pthread.cpp)
+
+add_gtest(func_lpf_debug_hook_null_input_pthread "pthread" ON
+ ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_input.pthread.cpp)
+
+add_gtest(func_lpf_debug_hook_null_output_pthread "pthread" ON
+ ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_output.pthread.cpp)
+
+add_gtest(func_lpf_debug_hook_null_spmd_pthread "pthread" ON
+ ${CMAKE_CURRENT_SOURCE_DIR}/func_lpf_debug_hook_null_spmd.pthread.cpp)
+
diff --git a/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp b/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp
index 139bad91..5afa95a2 100644
--- a/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp
+++ b/tests/functional/debug/func_lpf_debug_deregister_non_existing_slot.cpp
@@ -21,47 +21,22 @@
void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
{
- (void) args;
- int x = 3; int y = 6;
- lpf_memslot_t xSlot = LPF_INVALID_MEMSLOT;
- lpf_memslot_t ySlot = LPF_INVALID_MEMSLOT;
-
- lpf_err_t rc = lpf_resize_memory_register( lpf, 2 );
- EXPECT_EQ(LPF_SUCCESS, rc );
-
- rc = lpf_resize_message_queue( lpf, 2 );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- rc = lpf_sync( lpf, LPF_SYNC_DEFAULT );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- rc = lpf_register_global( lpf, &x, sizeof(x), &xSlot );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- rc = lpf_register_global( lpf, &y, sizeof(y), &ySlot );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- rc = lpf_sync( lpf, LPF_SYNC_DEFAULT );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- rc = lpf_get( lpf, (pid+1)%nprocs, xSlot, 3, ySlot, 0, sizeof(x), LPF_MSG_DEFAULT );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- FAIL();
- // the write error will be detected at this sync
- //rc = lpf_sync( lpf, LPF_SYNC_DEFAULT );
+ (void) pid; (void) nprocs; (void) args;
+ lpf_memslot_t slot;
+ memset( &slot, 1, sizeof(slot)); // init to some weird data
+ lpf_deregister( lpf, slot );
}
/**
- * \test Testing for a lpf_get() that reads past globally registered memory bounds
- * \pre P >= 2
- * \return Message: source memory .* is read past the end by 3 bytes
+ * \test Deregister a non-registered slot
+ * \pre P >= 1
+ * \return Message: Invalid attempt to deregister a memory slot, because it has not been registered before
* \return Exit code: 6
*/
-TEST( API, func_lpf_debug_deregister_non_existing_slot )
+TEST(API, func_lpf_debug_deregister_non_existing_slot )
{
lpf_err_t rc = LPF_SUCCESS;
rc = lpf_exec( LPF_ROOT, LPF_MAX_P, &spmd, LPF_NO_ARGS );
- EXPECT_EQ( LPF_SUCCESS, rc );
+ EXPECT_EQ(LPF_SUCCESS, rc );
}
diff --git a/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp b/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp
index 90da31e8..895260c9 100644
--- a/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp
+++ b/tests/functional/debug/func_lpf_debug_get_too_many_requests.cpp
@@ -55,7 +55,6 @@ void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
EXPECT_EQ(3, y[0] );
EXPECT_EQ(4, y[1] );
- //FAIL();
}
/**
diff --git a/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp
index c898128d..51eb6a5e 100644
--- a/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp
+++ b/tests/functional/debug/func_lpf_debug_hook_null_f_symbols.pthread.cpp
@@ -54,17 +54,25 @@ void * pthread_spmd( void * _data ) {
&init
);
EXPECT_EQ( rc, LPF_SUCCESS );
- FAIL();
+
+ rc = lpf_hook( init, &lpf_spmd, args );
+ EXPECT_EQ( rc, LPF_SUCCESS );
+
+ rc = lpf_pthread_finalize( init );
+ EXPECT_EQ( rc, LPF_SUCCESS );
return NULL;
}
+// the below tests for return code 134 as this is what aborted programs return
+// as an error code on modern systems
+
/**
* \test Tests lpf_hook on pthread implementation with NULL f_symbols
* \pre P <= 1
* \pre P >= 1
* \return Message: NULL f_symbols argument while f_size is non-zero
- * \return Exit code: 6
+ * \return Exit code: 134
*/
TEST( API, func_lpf_hook_null_f_symbols )
{
diff --git a/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp
index 04c7c355..051f8542 100644
--- a/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp
+++ b/tests/functional/debug/func_lpf_debug_hook_null_input.pthread.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
@@ -32,7 +32,7 @@ void lpf_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
{ (void) ctx; (void) pid; (void) nprocs; (void) args; }
void * pthread_spmd( void * _data ) {
- EXPECT_NE( "%p", _data, NULL );
+ EXPECT_NE( _data, (void*)NULL );
const struct thread_local_data data = * ((struct thread_local_data*) _data);
const int pts_rc = pthread_setspecific( pid_key, _data );
@@ -46,61 +46,62 @@ void * pthread_spmd( void * _data ) {
lpf_init_t init;
lpf_err_t rc = LPF_SUCCESS;
- EXPECT_EQ( "%d", pts_rc, 0 );
+ EXPECT_EQ( pts_rc, 0 );
rc = lpf_pthread_initialize(
(lpf_pid_t)data.s,
(lpf_pid_t)data.P,
&init
);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_hook( init, &lpf_spmd, args );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_pthread_finalize( init );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
return NULL;
}
+// the below tests for return code 134 as this is what aborted programs return
+// as an error code on modern systems
+
/**
* \test Tests lpf_hook on pthread implementation with NULL input
* \pre P <= 1
* \pre P >= 1
* \return Message: NULL input argument while input_size is non-zero
- * \return Exit code: 6
+ * \return Exit code: 134
*/
-TEST( func_lpf_hook_null_input )
+TEST( API, func_lpf_hook_null_input )
{
long k = 0;
const long P = sysconf( _SC_NPROCESSORS_ONLN );
const int ptc_rc = pthread_key_create( &pid_key, NULL );
- EXPECT_EQ( "%d", ptc_rc, 0 );
+ EXPECT_EQ( ptc_rc, 0 );
pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) );
- EXPECT_NE( "%p", threads, NULL );
+ EXPECT_NE( threads, (pthread_t*)NULL );
struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) );
- EXPECT_NE( "%p", data, NULL );
+ EXPECT_NE( data, (struct thread_local_data*)NULL );
for( k = 0; k < P; ++k ) {
data[ k ].P = P;
data[ k ].s = k;
const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
for( k = 0; k < P; ++k ) {
const int rval = pthread_join( threads[ k ], NULL );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
const int ptd_rc = pthread_key_delete( pid_key );
- EXPECT_EQ( "%d", ptd_rc, 0 );
-
- return 0;
+ EXPECT_EQ( ptd_rc, 0 );
}
diff --git a/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp
index 02268258..eec3be9a 100644
--- a/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp
+++ b/tests/functional/debug/func_lpf_debug_hook_null_output.pthread.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
@@ -32,7 +32,7 @@ void lpf_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
{ (void) ctx; (void) pid; (void) nprocs; (void) args; }
void * pthread_spmd( void * _data ) {
- EXPECT_NE( "%p", _data, NULL );
+ EXPECT_NE( _data, (void*)NULL );
const struct thread_local_data data = * ((struct thread_local_data*) _data);
const int pts_rc = pthread_setspecific( pid_key, _data );
@@ -46,61 +46,62 @@ void * pthread_spmd( void * _data ) {
lpf_init_t init;
lpf_err_t rc = LPF_SUCCESS;
- EXPECT_EQ( "%d", pts_rc, 0 );
+ EXPECT_EQ( pts_rc, 0 );
rc = lpf_pthread_initialize(
(lpf_pid_t)data.s,
(lpf_pid_t)data.P,
&init
);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_hook( init, &lpf_spmd, args );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_pthread_finalize( init );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
return NULL;
}
+// the below tests for return code 134 as this is what aborted programs return
+// as an error code on modern systems
+
/**
* \test Tests lpf_hook on pthread implementation with NULL output
* \pre P <= 1
* \pre P >= 1
* \return Message: NULL output argument while output_size is non-zero
- * \return Exit code: 6
+ * \return Exit code: 134
*/
-TEST( func_lpf_hook_null_output )
+TEST( API, func_lpf_hook_null_output )
{
long k = 0;
const long P = sysconf( _SC_NPROCESSORS_ONLN );
const int ptc_rc = pthread_key_create( &pid_key, NULL );
- EXPECT_EQ( "%d", ptc_rc, 0 );
+ EXPECT_EQ( ptc_rc, 0 );
pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) );
- EXPECT_NE( "%p", threads, NULL );
+ EXPECT_NE( threads, (pthread_t *)NULL );
struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) );
- EXPECT_NE( "%p", data, NULL );
+ EXPECT_NE( data, (struct thread_local_data*)NULL );
for( k = 0; k < P; ++k ) {
data[ k ].P = P;
data[ k ].s = k;
const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
for( k = 0; k < P; ++k ) {
const int rval = pthread_join( threads[ k ], NULL );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
const int ptd_rc = pthread_key_delete( pid_key );
- EXPECT_EQ( "%d", ptd_rc, 0 );
-
- return 0;
+ EXPECT_EQ( ptd_rc, 0 );
}
diff --git a/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp b/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp
index 20209c16..00bcc0c7 100644
--- a/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp
+++ b/tests/functional/debug/func_lpf_debug_hook_null_spmd.pthread.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
@@ -30,7 +30,7 @@ struct thread_local_data {
void * pthread_spmd( void * _data ) {
- EXPECT_NE( "%p", _data, NULL );
+ EXPECT_NE( _data, (void*)NULL );
const struct thread_local_data data = * ((struct thread_local_data*) _data);
const int pts_rc = pthread_setspecific( pid_key, _data );
@@ -44,61 +44,62 @@ void * pthread_spmd( void * _data ) {
lpf_init_t init;
lpf_err_t rc = LPF_SUCCESS;
- EXPECT_EQ( "%d", pts_rc, 0 );
+ EXPECT_EQ( pts_rc, 0 );
rc = lpf_pthread_initialize(
(lpf_pid_t)data.s,
(lpf_pid_t)data.P,
&init
);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_hook( init, NULL, args );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_pthread_finalize( init );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
return NULL;
}
+// the below tests for return code 134 as this is what aborted programs return
+// as an error code on modern systems
+
/**
* \test Tests lpf_hook on pthread implementation with NULL spmd
* \pre P <= 1
* \pre P >= 1
* \return Message: NULL spmd argument
- * \return Exit code: 6
+ * \return Exit code: 134
*/
-TEST( func_lpf_hook_null_spmd )
+TEST( API, func_lpf_hook_null_spmd )
{
long k = 0;
const long P = sysconf( _SC_NPROCESSORS_ONLN );
const int ptc_rc = pthread_key_create( &pid_key, NULL );
- EXPECT_EQ( "%d", ptc_rc, 0 );
+ EXPECT_EQ( ptc_rc, 0 );
pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) );
- EXPECT_NE( "%p", threads, NULL );
+ EXPECT_NE( threads, (pthread_t*)NULL );
struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) );
- EXPECT_NE( "%p", data, NULL );
+ EXPECT_NE( data, (struct thread_local_data *)NULL );
for( k = 0; k < P; ++k ) {
data[ k ].P = P;
data[ k ].s = k;
const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
for( k = 0; k < P; ++k ) {
const int rval = pthread_join( threads[ k ], NULL );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
const int ptd_rc = pthread_key_delete( pid_key );
- EXPECT_EQ( "%d", ptd_rc, 0 );
-
- return 0;
+ EXPECT_EQ( ptd_rc, 0 );
}
diff --git a/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp b/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp
index cb4da30e..d31498b3 100644
--- a/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp
+++ b/tests/functional/debug/func_lpf_debug_put_read_write_conflict_among_many.cpp
@@ -75,3 +75,5 @@ TEST( API, func_lpf_debug_put_read_write_conflict_among_many )
rc = lpf_exec( LPF_ROOT, LPF_MAX_P, &spmd, LPF_NO_ARGS );
EXPECT_EQ( LPF_SUCCESS, rc );
}
+
+
diff --git a/tests/functional/exception_list b/tests/functional/exception_list
deleted file mode 100644
index c7590fc1..00000000
--- a/tests/functional/exception_list
+++ /dev/null
@@ -1,5 +0,0 @@
-func_lpf_put_parallel_bad_pattern_.*
-func_lpf_hook_tcp_mpi..._[^_]*_mvapich2
-func_lpf_hook_tcp_mpi..._[^_]*_openmpi_gcc_64_1_10_7
-func_lpf_hook_tcp_timeout_mpi..._[^_]*_openmpi_gcc_64_1_10_7
-func_lpf_hook_tcp_mpi..._[^_]*_mpich_ge_gcc_64_3_2rc2
diff --git a/tests/functional/func_lpf_hook_simple.mpirma.cpp b/tests/functional/func_lpf_hook_simple.mpirma.cpp
index 5dfa7104..81016a39 100644
--- a/tests/functional/func_lpf_hook_simple.mpirma.cpp
+++ b/tests/functional/func_lpf_hook_simple.mpirma.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
@@ -28,11 +28,11 @@ void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
lpf_err_t rc = LPF_SUCCESS;
rc = lpf_resize_message_queue( ctx, 2);
- EXPECT_EQ( "%d", LPF_SUCCESS, rc );
+ EXPECT_EQ( LPF_SUCCESS, rc );
rc = lpf_resize_memory_register( ctx, 2);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_sync(ctx, LPF_SYNC_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
int x = 5 - pid;
int y = pid;
@@ -41,21 +41,21 @@ void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
lpf_memslot_t ySlot = LPF_INVALID_MEMSLOT;
rc = lpf_register_global( ctx, &x, sizeof(x), &xSlot );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_register_global( ctx, &y, sizeof(y), &ySlot );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_sync( ctx, LPF_SYNC_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_put( ctx, xSlot, 0, (pid + 1) % nprocs, ySlot, 0, sizeof(x), LPF_MSG_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_sync( ctx, LPF_SYNC_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
- EXPECT_EQ( "%d", x, (int) (5 - pid) );
- EXPECT_EQ( "%d", y, (int) (5 - (pid + nprocs -1) % nprocs) );
+ EXPECT_EQ( x, (int) (5 - pid) );
+ EXPECT_EQ( y, (int) (5 - (pid + nprocs -1) % nprocs) );
}
// disable automatic initialization.
@@ -66,7 +66,7 @@ const int LPF_MPI_AUTO_INITIALIZE=0;
* \pre P >= 1
* \return Exit code: 0
*/
-TEST( func_lpf_hook_simple_mpi )
+TEST(API, func_lpf_hook_simple_mpi)
{
lpf_err_t rc = LPF_SUCCESS;
MPI_Init(NULL, NULL);
@@ -79,16 +79,15 @@ TEST( func_lpf_hook_simple_mpi )
lpf_init_t init;
rc = lpf_mpi_initialize_with_mpicomm( MPI_COMM_WORLD, &init);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_hook( init, &spmd, LPF_NO_ARGS );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_mpi_finalize( init );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
MPI_Finalize();
- return 0;
}
diff --git a/tests/functional/func_lpf_hook_simple.pthread.cpp b/tests/functional/func_lpf_hook_simple.pthread.cpp
index 3b33bdc6..6438b676 100644
--- a/tests/functional/func_lpf_hook_simple.pthread.cpp
+++ b/tests/functional/func_lpf_hook_simple.pthread.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
@@ -36,18 +36,18 @@ struct thread_local_data {
void lpf_spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
{
(void) ctx;
- const struct thread_local_data * const data = pthread_getspecific( pid_key );
-
- EXPECT_EQ( "%zd", (size_t)nprocs, (size_t)(data->P) );
- EXPECT_EQ( "%zd", (size_t)pid, (size_t)(data->s) );
- EXPECT_EQ( "%zd", (size_t)(args.input_size), (size_t)(sizeof( struct thread_local_data)) );
- EXPECT_EQ( "%zd", (size_t)(args.output_size), (size_t)0 );
- EXPECT_EQ( "%p", args.input, data );
- EXPECT_EQ( "%p", args.output, NULL );
+ const struct thread_local_data * const data = static_cast(pthread_getspecific( pid_key ));
+
+ EXPECT_EQ( (size_t)nprocs, (size_t)(data->P) );
+ EXPECT_EQ( (size_t)pid, (size_t)(data->s) );
+ EXPECT_EQ( (size_t)(args.input_size), (size_t)(sizeof( struct thread_local_data)) );
+ EXPECT_EQ( (size_t)(args.output_size), (size_t)0 );
+ EXPECT_EQ( args.input, data );
+ EXPECT_EQ( args.output, nullptr );
}
void * pthread_spmd( void * _data ) {
- EXPECT_NE( "%p", _data, NULL );
+ EXPECT_NE( _data, nullptr);
const struct thread_local_data data = * ((struct thread_local_data*) _data);
const int pts_rc = pthread_setspecific( pid_key, _data );
@@ -61,20 +61,20 @@ void * pthread_spmd( void * _data ) {
lpf_init_t init;
lpf_err_t rc = LPF_SUCCESS;
- EXPECT_EQ( "%d", pts_rc, 0 );
+ EXPECT_EQ( pts_rc, 0 );
rc = lpf_pthread_initialize(
(lpf_pid_t)data.s,
(lpf_pid_t)data.P,
&init
);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_hook( init, &lpf_spmd, args );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_pthread_finalize( init );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
return NULL;
}
@@ -85,36 +85,35 @@ void * pthread_spmd( void * _data ) {
* \pre P >= 1
* \return Exit code: 0
*/
-TEST( func_lpf_hook_simple_pthread )
+TEST(API, func_lpf_hook_simple_pthread )
{
long k = 0;
const long P = sysconf( _SC_NPROCESSORS_ONLN );
const int ptc_rc = pthread_key_create( &pid_key, NULL );
- EXPECT_EQ( "%d", ptc_rc, 0 );
+ EXPECT_EQ( ptc_rc, 0 );
pthread_t * const threads = (pthread_t*) malloc( P * sizeof(pthread_t) );
- EXPECT_NE( "%p", threads, NULL );
+ EXPECT_NE( threads, nullptr );
struct thread_local_data * const data = (struct thread_local_data*) malloc( P * sizeof(struct thread_local_data) );
- EXPECT_NE( "%p", data, NULL );
+ EXPECT_NE( data, nullptr );
for( k = 0; k < P; ++k ) {
data[ k ].P = P;
data[ k ].s = k;
const int rval = pthread_create( threads + k, NULL, &pthread_spmd, data + k );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
for( k = 0; k < P; ++k ) {
const int rval = pthread_join( threads[ k ], NULL );
- EXPECT_EQ( "%d", rval, 0 );
+ EXPECT_EQ( rval, 0 );
}
const int ptd_rc = pthread_key_delete( pid_key );
- EXPECT_EQ( "%d", ptd_rc, 0 );
+ EXPECT_EQ( ptd_rc, 0 );
- return 0;
}
diff --git a/tests/functional/func_lpf_hook_subset.mpimsg.cpp b/tests/functional/func_lpf_hook_subset.mpimsg.cpp
index f073e443..6693bab3 100644
--- a/tests/functional/func_lpf_hook_subset.mpimsg.cpp
+++ b/tests/functional/func_lpf_hook_subset.mpimsg.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
@@ -39,10 +39,10 @@ void subset_func(MPI_Comm comm)
lpf_init_t init;
lpf_err_t rc = lpf_mpi_initialize_with_mpicomm(comm, &init);
- EXPECT_EQ( "%d", LPF_SUCCESS, rc );
+ EXPECT_EQ( LPF_SUCCESS, rc );
rc = lpf_hook(init, test_spmd, LPF_NO_ARGS);
- EXPECT_EQ( "%d", LPF_SUCCESS, rc );
+ EXPECT_EQ( LPF_SUCCESS, rc );
}
/**
@@ -50,7 +50,7 @@ void subset_func(MPI_Comm comm)
* \pre P >= 3
* \return Exit code: 0
*/
-TEST( func_lpf_hook_subset )
+TEST(API, func_lpf_hook_subset )
{
MPI_Init(NULL, NULL);
@@ -71,5 +71,5 @@ TEST( func_lpf_hook_subset )
MPI_Barrier(MPI_COMM_WORLD); // Paranoid barrier
MPI_Finalize();
- return 0;
+
}
diff --git a/tests/functional/func_lpf_hook_tcp.mpirma.cpp b/tests/functional/func_lpf_hook_tcp.mpirma.cpp
index 2921e6fc..0d7f0290 100644
--- a/tests/functional/func_lpf_hook_tcp.mpirma.cpp
+++ b/tests/functional/func_lpf_hook_tcp.mpirma.cpp
@@ -17,28 +17,35 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
+static int myargc;
+static char **myargv;
+
+// disable automatic initialization.
+const int LPF_MPI_AUTO_INITIALIZE=0;
+
+
void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
{
lpf_err_t rc = LPF_SUCCESS;
struct { int pid, nprocs; } params;
- EXPECT_EQ( "%lu", sizeof(params), args.input_size );
+ EXPECT_EQ( sizeof(params), args.input_size );
memcpy( ¶ms, args.input, sizeof(params));
- EXPECT_EQ( "%u", (lpf_pid_t) params.pid, pid );
- EXPECT_EQ( "%u", (lpf_pid_t) params.nprocs, nprocs );
+ EXPECT_EQ( (lpf_pid_t) params.pid, pid );
+ EXPECT_EQ( (lpf_pid_t) params.nprocs, nprocs );
rc = lpf_resize_message_queue( ctx, 2);
- EXPECT_EQ( "%d", LPF_SUCCESS, rc );
+ EXPECT_EQ( LPF_SUCCESS, rc );
rc = lpf_resize_memory_register( ctx, 2);
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_sync(ctx, LPF_SYNC_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
int x = 5 - pid;
int y = pid;
@@ -47,25 +54,23 @@ void spmd( lpf_t ctx, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args )
lpf_memslot_t ySlot = LPF_INVALID_MEMSLOT;
rc = lpf_register_global( ctx, &x, sizeof(x), &xSlot );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_register_global( ctx, &y, sizeof(y), &ySlot );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_sync( ctx, LPF_SYNC_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_put( ctx, xSlot, 0, (pid + 1) % nprocs, ySlot, 0, sizeof(x), LPF_MSG_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_sync( ctx, LPF_SYNC_DEFAULT );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
- EXPECT_EQ( "%d", x, (int) (5 - pid) );
- EXPECT_EQ( "%d", y, (int) (5 - (pid + nprocs -1) % nprocs) );
+ EXPECT_EQ( x, (int) (5 - pid) );
+ EXPECT_EQ( y, (int) (5 - (pid + nprocs -1) % nprocs) );
}
-// disable automatic initialization.
-const int LPF_MPI_AUTO_INITIALIZE=0;
/**
* \test Tests lpf_hook on mpi implementation using TCP/IP to initialize. The pids and nprocs are checked for their correctness.
@@ -73,15 +78,14 @@ const int LPF_MPI_AUTO_INITIALIZE=0;
* \return Exit code: 0
* \note Independent processes: yes
*/
-TEST( func_lpf_hook_tcp )
+TEST( API, func_lpf_hook_tcp_mpirma )
{
lpf_err_t rc = LPF_SUCCESS;
- MPI_Init(&argc, &argv);
struct { int pid, nprocs; } params = { 0, 0};
- EXPECT_GT("%d", argc, 2 );
- params.pid = atoi( argv[1] );
- params.nprocs = atoi( argv[2] );
+ EXPECT_GT( myargc, 2 );
+ params.pid = atoi( myargv[1] );
+ params.nprocs = atoi( myargv[2] );
lpf_init_t init;
rc = lpf_mpi_initialize_over_tcp(
@@ -89,7 +93,7 @@ TEST( func_lpf_hook_tcp )
params.pid, params.nprocs, &init); // let e.g. Intel MPI try a few
// alternative fabrics
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
lpf_args_t args;
args.input = ¶ms;
@@ -100,13 +104,20 @@ TEST( func_lpf_hook_tcp )
args.f_size = 0;
rc = lpf_hook( init, &spmd, args );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
rc = lpf_mpi_finalize( init );
- EXPECT_EQ( "%d", rc, LPF_SUCCESS );
+ EXPECT_EQ( rc, LPF_SUCCESS );
MPI_Finalize();
- return 0;
+}
+
+int main(int argc, char **argv) {
+ myargc = argc;
+ myargv = argv;
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+
}
diff --git a/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp b/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp
index e8aba501..94d3edd6 100644
--- a/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp
+++ b/tests/functional/func_lpf_hook_tcp_timeout.mpirma.cpp
@@ -17,7 +17,7 @@
#include
#include
-#include "Test.h"
+#include "gtest/gtest.h"
#include
#include
@@ -31,7 +31,7 @@ const int LPF_MPI_AUTO_INITIALIZE=0;
* \pre P <= 100
* \return Exit code: 1
*/
-TEST( func_lpf_hook_tcp_timeout_mpi )
+TEST(API, func_lpf_hook_tcp_timeout_mpi )
{
MPI_Init(NULL, NULL);
@@ -45,9 +45,8 @@ TEST( func_lpf_hook_tcp_timeout_mpi )
"localhost", "9325", 999,
pid, nprocs, &init);
- EXPECT_EQ( "%d", rc, LPF_ERR_FATAL );
+ EXPECT_EQ( rc, LPF_ERR_FATAL );
- return 0;
}
diff --git a/tests/functional/func_lpf_put_parallel_bad_pattern.cpp b/tests/functional/func_lpf_put_parallel_bad_pattern.cpp
deleted file mode 100644
index fe1d8f48..00000000
--- a/tests/functional/func_lpf_put_parallel_bad_pattern.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-
-/*
- * Copyright 2021 Huawei Technologies Co., Ltd.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-#include
-
-#include "gtest/gtest.h"
-
-void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args)
-{
- (void) args; // ignore args parameter
-
- lpf_err_t rc = LPF_SUCCESS;
- const unsigned n = sqrt(nprocs);
- unsigned i;
- unsigned * xs, *ys;
- ys = (unsigned *) malloc( sizeof(ys[0]) * n);
- xs = (unsigned *) malloc( sizeof(xs[0]) * n);
- for (i = 0; i < n; ++i)
- {
- xs[i] = i;
- ys[i] = 0;
- }
-
- rc = lpf_resize_message_queue( lpf, n);
- EXPECT_EQ( LPF_SUCCESS, rc );
- rc = lpf_resize_memory_register( lpf, 2 );
- EXPECT_EQ( LPF_SUCCESS, rc );
- rc = lpf_sync( lpf, LPF_SYNC_DEFAULT );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- lpf_memslot_t xslot = LPF_INVALID_MEMSLOT;
- lpf_memslot_t yslot = LPF_INVALID_MEMSLOT;
- rc = lpf_register_local( lpf, xs, sizeof(xs[0]) * n, &xslot );
- EXPECT_EQ( LPF_SUCCESS, rc );
- rc = lpf_register_global( lpf, ys, sizeof(ys[0]) * n, &yslot );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- rc = lpf_sync( lpf, LPF_SYNC_DEFAULT);
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- // Check that data is OK.
- for (i = 0; i < n; ++i)
- {
- EXPECT_EQ( i, xs[i] );
- EXPECT_EQ( 0u, ys[i] );
- }
-
- if ( pid < n )
- {
- for ( i = 0; i < n; ++ i)
- {
- EXPECT_LT( i*n, nprocs);
- rc = lpf_put( lpf, xslot, sizeof(xs[0])*i,
- i*n, yslot, sizeof(ys[0])*pid, sizeof(xs[0]),
- LPF_MSG_DEFAULT );
- EXPECT_EQ( LPF_SUCCESS, rc );
- }
- }
-
-
- rc = lpf_sync( lpf, LPF_SYNC_DEFAULT );
- EXPECT_EQ( LPF_SUCCESS, rc );
-
- for (i = 0; i < n; ++i)
- {
- EXPECT_EQ( i, xs[i] );
- if ( pid % n == 0 && pid < n*n)
- EXPECT_EQ( pid / n, ys[i] );
- else
- EXPECT_EQ( 0, ys[i] );
- }
-
-}
-
-/**
- * \test Test lpf_put by doing a pattern which bad for a sparse all-to-all
- * \pre P >= 5
- * \pre P <= 5
- * \return Exit code: 0
- */
-TEST( API, func_lpf_put_parallel_bad_pattern )
-{
- lpf_err_t rc = lpf_exec( LPF_ROOT, LPF_MAX_P, spmd, LPF_NO_ARGS);
- EXPECT_EQ( LPF_SUCCESS, rc );
-}
diff --git a/tests/functional/func_lpf_test_noc_ring.cpp b/tests/functional/func_lpf_test_noc_ring.cpp
new file mode 100644
index 00000000..1050b68e
--- /dev/null
+++ b/tests/functional/func_lpf_test_noc_ring.cpp
@@ -0,0 +1,85 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mpi.h"
+#include
+#include
+#include "gtest/gtest.h"
+
+void spmd( lpf_t lpf, lpf_pid_t pid, lpf_pid_t nprocs, lpf_args_t args)
+{
+ (void) args; // ignore args parameter
+
+ lpf_err_t rc = LPF_SUCCESS;
+
+ char buf1[30] = {'\0'};
+ char buf2[30] = {'\0'};
+
+ strcpy(buf1, "HELLO");
+
+ rc = lpf_resize_memory_register(lpf, 2); // identical to lpf_noc_resize at the moment
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ rc = lpf_resize_message_queue( lpf, 2);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+
+ rc = lpf_sync( lpf, LPF_SYNC_DEFAULT );
+ EXPECT_EQ( LPF_SUCCESS, rc );
+
+ lpf_memslot_t xslot = LPF_INVALID_MEMSLOT;
+ lpf_memslot_t yslot = LPF_INVALID_MEMSLOT;
+ rc = lpf_register_local( lpf, buf1, sizeof(buf1), &xslot );
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ rc = lpf_noc_register( lpf, buf2, sizeof(buf2), &yslot );
+ EXPECT_EQ( LPF_SUCCESS, rc );
+
+
+ int left = (nprocs + pid - 1) % nprocs;
+ int right = ( pid + 1) % nprocs;
+
+ char * buffer;
+ size_t bufferSize;
+ lpf_noc_serialize_slot(lpf, yslot, &buffer, &bufferSize);
+ char rmtBuff[bufferSize];
+
+ MPI_Sendrecv(buffer, bufferSize, MPI_BYTE, left, 0, rmtBuff, bufferSize, MPI_BYTE, right, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+
+ rc = lpf_noc_deserialize_slot(lpf, rmtBuff, yslot);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ rc = lpf_noc_put(lpf, xslot, 0, right, yslot, 0, sizeof(buf1), LPF_MSG_DEFAULT);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ rc = lpf_sync(lpf, LPF_SYNC_DEFAULT);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ EXPECT_EQ(std::string(buf2), std::string(buf1));
+ rc = lpf_deregister(lpf, xslot);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+ rc = lpf_noc_deregister(lpf, yslot);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+
+}
+
+/**
+ * \test Testing NOC functionality
+ * \pre P >= 2
+ * \pre P <= 2
+ * \return Exit code: 0
+ */
+TEST( API, func_lpfAPI_test_noc_ring )
+{
+ lpf_err_t rc = lpf_exec( LPF_ROOT, LPF_MAX_P, spmd, LPF_NO_ARGS);
+ EXPECT_EQ( LPF_SUCCESS, rc );
+}