From 80abeeb85f066986daec426c51bea989d790b09e Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 28 Mar 2024 12:08:18 -0700 Subject: [PATCH 01/21] Initial conversion to C++ --- .gitignore | 1 + CMakeLists.txt | 2 +- src/CMakeLists.txt | 30 +++++++++---------- src/{fenix.c => fenix.cpp} | 2 +- ...{fenix_callbacks.c => fenix_callbacks.cpp} | 4 +-- ...{fenix_comm_list.c => fenix_comm_list.cpp} | 4 +-- ...enix_data_group.c => fenix_data_group.cpp} | 0 ...ix_data_member.c => fenix_data_member.cpp} | 0 ...ix_data_policy.c => fenix_data_policy.cpp} | 0 ...c => fenix_data_policy_in_memory_raid.cpp} | 2 +- ...ata_recovery.c => fenix_data_recovery.cpp} | 0 ...ix_data_subset.c => fenix_data_subset.cpp} | 0 ..._mpi_override.c => fenix_mpi_override.cpp} | 0 src/{fenix_opt.c => fenix_opt.cpp} | 0 ..._recovery.c => fenix_process_recovery.cpp} | 4 +-- src/{fenix_util.c => fenix_util.cpp} | 0 src/{globals.c => globals.cpp} | 0 test/failed_spares/CMakeLists.txt | 2 +- test/issend/CMakeLists.txt | 2 +- test/message_replay/CMakeLists.txt | 14 +++++++++ test/no_jump/CMakeLists.txt | 2 +- test/request_cancelled/CMakeLists.txt | 2 +- test/request_tracking/CMakeLists.txt | 2 +- 23 files changed, 44 insertions(+), 29 deletions(-) rename src/{fenix.c => fenix.cpp} (99%) rename src/{fenix_callbacks.c => fenix_callbacks.cpp} (95%) rename src/{fenix_comm_list.c => fenix_comm_list.cpp} (98%) rename src/{fenix_data_group.c => fenix_data_group.cpp} (100%) rename src/{fenix_data_member.c => fenix_data_member.cpp} (100%) rename src/{fenix_data_policy.c => fenix_data_policy.cpp} (100%) rename src/{fenix_data_policy_in_memory_raid.c => fenix_data_policy_in_memory_raid.cpp} (99%) rename src/{fenix_data_recovery.c => fenix_data_recovery.cpp} (100%) rename src/{fenix_data_subset.c => fenix_data_subset.cpp} (100%) rename src/{fenix_mpi_override.c => fenix_mpi_override.cpp} (100%) rename src/{fenix_opt.c => fenix_opt.cpp} (100%) rename src/{fenix_process_recovery.c => fenix_process_recovery.cpp} (99%) rename src/{fenix_util.c => fenix_util.cpp} (100%) rename src/{globals.c => globals.cpp} (100%) create mode 100644 test/message_replay/CMakeLists.txt diff --git a/.gitignore b/.gitignore index 3e3dd51..04d97a9 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ test/request_tracking/fenix_request_tracking_test test/request_tracking/fenix_request_tracking_test_nofenix build/ install/ +spack-* # Other *~ diff --git a/CMakeLists.txt b/CMakeLists.txt index ecaac8b..b8d6c7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ cmake_minimum_required(VERSION 3.10.2) -project(Fenix C) +project(Fenix C CXX) # The version number. set(FENIX_VERSION_MAJOR 1) set(FENIX_VERSION_MINOR 0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7c823fd..a33ea2c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,25 +15,25 @@ configure_file (${CMAKE_SOURCE_DIR}/include/fenix-config.h.in FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h) set (Fenix_SOURCES -fenix.c -fenix_mpi_override.c -fenix_opt.c -fenix_process_recovery.c -fenix_util.c -fenix_data_recovery.c -fenix_data_group.c -fenix_data_policy.c -fenix_data_policy_in_memory_raid.c -fenix_data_member.c -fenix_data_subset.c -fenix_comm_list.c -fenix_callbacks.c -globals.c +fenix.cpp +fenix_mpi_override.cpp +fenix_opt.cpp +fenix_process_recovery.cpp +fenix_util.cpp +fenix_data_recovery.cpp +fenix_data_group.cpp +fenix_data_policy.cpp +fenix_data_policy_in_memory_raid.cpp +fenix_data_member.cpp +fenix_data_subset.cpp +fenix_comm_list.cpp +fenix_callbacks.cpp +globals.cpp ) add_library( fenix STATIC ${Fenix_SOURCES}) -target_link_libraries(fenix PUBLIC MPI::MPI_C) +target_link_libraries(fenix PUBLIC MPI::MPI_CXX) target_include_directories(fenix PUBLIC diff --git a/src/fenix.c b/src/fenix.cpp similarity index 99% rename from src/fenix.c rename to src/fenix.cpp index 056814a..5c03796 100644 --- a/src/fenix.c +++ b/src/fenix.cpp @@ -91,7 +91,7 @@ int Fenix_Data_member_create( int group_id, int member_id, void *buffer, int cou } int Fenix_Data_group_get_redundancy_policy( int group_id, int* policy_name, void *policy_value, int *flag ) { - return __fenix_group_get_redundancy_policy( group_id, policy_name, policy_value, flag ); + return __fenix_group_get_redundancy_policy( group_id, policy_name, (int*)policy_value, flag ); } int Fenix_Data_wait(Fenix_Request request) { diff --git a/src/fenix_callbacks.c b/src/fenix_callbacks.cpp similarity index 95% rename from src/fenix_callbacks.c rename to src/fenix_callbacks.cpp index 8779402..1e0c913 100644 --- a/src/fenix_callbacks.c +++ b/src/fenix_callbacks.cpp @@ -70,7 +70,7 @@ int __fenix_callback_register(void (*recover)(MPI_Comm, int, void *), void *call { int error_code = FENIX_SUCCESS; if (fenix.fenix_init_flag) { - fenix_callback_func *fp = s_malloc(sizeof(fenix_callback_func)); + fenix_callback_func *fp = (fenix_callback_func *) s_malloc(sizeof(fenix_callback_func)); fp->x = recover; fp->y = callback_data; __fenix_callback_push( &fenix.callback_list, fp); @@ -105,7 +105,7 @@ void __fenix_callback_invoke_all(int error) void __fenix_callback_push(fenix_callback_list_t **head, fenix_callback_func *fp) { - fenix_callback_list_t *callback = malloc(sizeof(fenix_callback_list_t)); + fenix_callback_list_t *callback = (fenix_callback_list_t *) malloc(sizeof(fenix_callback_list_t)); callback->callback = fp; callback->next = *head; *head = callback; diff --git a/src/fenix_comm_list.c b/src/fenix_comm_list.cpp similarity index 98% rename from src/fenix_comm_list.c rename to src/fenix_comm_list.cpp index d1b56d2..1a805fe 100644 --- a/src/fenix_comm_list.c +++ b/src/fenix_comm_list.cpp @@ -123,11 +123,11 @@ void __fenix_comm_list_destroy(void) { else { fenix_comm_list_elm_t *current = my_list.tail; while (current->next) { - fenix_comm_list_elm_t *new = current->next; + fenix_comm_list_elm_t *next = current->next; MPIX_Comm_revoke(*current->comm); PMPI_Comm_free(current->comm); free(current); - current = new; + current = next; } MPIX_Comm_revoke(*current->comm); PMPI_Comm_free(current->comm); diff --git a/src/fenix_data_group.c b/src/fenix_data_group.cpp similarity index 100% rename from src/fenix_data_group.c rename to src/fenix_data_group.cpp diff --git a/src/fenix_data_member.c b/src/fenix_data_member.cpp similarity index 100% rename from src/fenix_data_member.c rename to src/fenix_data_member.cpp diff --git a/src/fenix_data_policy.c b/src/fenix_data_policy.cpp similarity index 100% rename from src/fenix_data_policy.c rename to src/fenix_data_policy.cpp diff --git a/src/fenix_data_policy_in_memory_raid.c b/src/fenix_data_policy_in_memory_raid.cpp similarity index 99% rename from src/fenix_data_policy_in_memory_raid.c rename to src/fenix_data_policy_in_memory_raid.cpp index 897f163..5b17aca 100644 --- a/src/fenix_data_policy_in_memory_raid.c +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -846,7 +846,7 @@ int __imr_member_restore(fenix_group_t* g, int member_id, if(recovery_locally_possible) retval = FENIX_SUCCESS; } else if (group->raid_mode == 5){ - int* set_results = malloc(sizeof(int) * group->set_size); + int* set_results = (int *) malloc(sizeof(int) * group->set_size); MPI_Allgather((void*)&found_member, 1, MPI_INT, (void*)set_results, 1, MPI_INT, group->set_comm); diff --git a/src/fenix_data_recovery.c b/src/fenix_data_recovery.cpp similarity index 100% rename from src/fenix_data_recovery.c rename to src/fenix_data_recovery.cpp diff --git a/src/fenix_data_subset.c b/src/fenix_data_subset.cpp similarity index 100% rename from src/fenix_data_subset.c rename to src/fenix_data_subset.cpp diff --git a/src/fenix_mpi_override.c b/src/fenix_mpi_override.cpp similarity index 100% rename from src/fenix_mpi_override.c rename to src/fenix_mpi_override.cpp diff --git a/src/fenix_opt.c b/src/fenix_opt.cpp similarity index 100% rename from src/fenix_opt.c rename to src/fenix_opt.cpp diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.cpp similarity index 99% rename from src/fenix_process_recovery.c rename to src/fenix_process_recovery.cpp index 6a65820..42a7949 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.cpp @@ -83,7 +83,7 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha MPI_Comm_create_errhandler(__fenix_test_MPI, &fenix.mpi_errhandler); - fenix.world = malloc(sizeof(MPI_Comm)); + fenix.world = (MPI_Comm *)malloc(sizeof(MPI_Comm)); MPI_Comm_dup(comm, fenix.world); PMPI_Comm_set_errhandler(*fenix.world, fenix.mpi_errhandler); @@ -659,7 +659,7 @@ int* __fenix_get_fail_ranks(int *survivor_world, int survivor_world_size, int fa qsort(survivor_world, survivor_world_size, sizeof(int), __fenix_comparator); int failed_pos = 0; - int *fail_ranks = calloc(fail_world_size, sizeof(int)); + int *fail_ranks = (int *)calloc(fail_world_size, sizeof(int)); int i; for (i = 0; i < survivor_world_size + fail_world_size; i++) { diff --git a/src/fenix_util.c b/src/fenix_util.cpp similarity index 100% rename from src/fenix_util.c rename to src/fenix_util.cpp diff --git a/src/globals.c b/src/globals.cpp similarity index 100% rename from src/globals.c rename to src/globals.cpp diff --git a/test/failed_spares/CMakeLists.txt b/test/failed_spares/CMakeLists.txt index 8fd95b3..46e4f4c 100644 --- a/test/failed_spares/CMakeLists.txt +++ b/test/failed_spares/CMakeLists.txt @@ -9,7 +9,7 @@ # add_executable(fenix_failed_spares fenix_failed_spares.c) -target_link_libraries(fenix_failed_spares fenix MPI::MPI_C) +target_link_libraries(fenix_failed_spares fenix MPI::MPI_CXX) add_test(NAME failed_spares COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_failed_spares ${MPIEXEC_POSTFLAGS} 3 1 3 4 ) diff --git a/test/issend/CMakeLists.txt b/test/issend/CMakeLists.txt index f141d40..2b44c38 100644 --- a/test/issend/CMakeLists.txt +++ b/test/issend/CMakeLists.txt @@ -9,6 +9,6 @@ # add_executable(fenix_issend_test fenix_issend_test.c) -target_link_libraries(fenix_issend_test fenix MPI::MPI_C) +target_link_libraries(fenix_issend_test fenix MPI::MPI_CXX) add_test(NAME issend COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_issend_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/message_replay/CMakeLists.txt b/test/message_replay/CMakeLists.txt new file mode 100644 index 0000000..f9f2c11 --- /dev/null +++ b/test/message_replay/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +add_executable(fenix_message_logging_test fenix_message_logging_test.cxx) +target_link_libraries(fenix_message_logging_test fenix MPI::MPI_CXX) + +add_test(NAME request_cancelled COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_message_logging_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/no_jump/CMakeLists.txt b/test/no_jump/CMakeLists.txt index dfc9311..250059f 100644 --- a/test/no_jump/CMakeLists.txt +++ b/test/no_jump/CMakeLists.txt @@ -9,6 +9,6 @@ # add_executable(fenix_no_jump_test fenix_no_jump_test.c) -target_link_libraries(fenix_no_jump_test fenix MPI::MPI_C) +target_link_libraries(fenix_no_jump_test fenix MPI::MPI_CXX) add_test(NAME no_jump COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_no_jump_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/request_cancelled/CMakeLists.txt b/test/request_cancelled/CMakeLists.txt index 97dd331..f20dee8 100644 --- a/test/request_cancelled/CMakeLists.txt +++ b/test/request_cancelled/CMakeLists.txt @@ -9,6 +9,6 @@ # add_executable(fenix_request_cancelled_test fenix_req_cancelled_test.c) -target_link_libraries(fenix_request_cancelled_test fenix MPI::MPI_C) +target_link_libraries(fenix_request_cancelled_test fenix MPI::MPI_CXX) add_test(NAME request_cancelled COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_request_cancelled_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/request_tracking/CMakeLists.txt b/test/request_tracking/CMakeLists.txt index 8d008ed..694bc99 100644 --- a/test/request_tracking/CMakeLists.txt +++ b/test/request_tracking/CMakeLists.txt @@ -9,7 +9,7 @@ # add_executable(fenix_request_tracking_test fenix_request_tracking_test.c) -target_link_libraries(fenix_request_tracking_test fenix MPI::MPI_C) +target_link_libraries(fenix_request_tracking_test fenix MPI::MPI_CXX) add_test(NAME request_tracking COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} fenix_request_tracking_test ${MPIEXEC_POSTFLAGS}) From c105f57be32dd919f09cef05e2cba248f1f1186c Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 4 Mar 2025 11:39:37 -0600 Subject: [PATCH 02/21] Reorganize to allow c++ in internal headers --- include/fenix.h | 4 +- ...{fenix_comm_list.h => fenix_comm_list.hpp} | 0 ...enix_data_group.h => fenix_data_group.hpp} | 11 +- ...ix_data_member.h => fenix_data_member.hpp} | 4 +- ...ix_data_packet.h => fenix_data_packet.hpp} | 0 ...ix_data_policy.h => fenix_data_policy.hpp} | 2 +- ...h => fenix_data_policy_in_memory_raid.hpp} | 2 +- ...ata_recovery.h => fenix_data_recovery.hpp} | 6 +- include/{fenix_ext.h => fenix_ext.hpp} | 7 +- include/fenix_f.h | 82 ----------- ...process_recovery_global.h => fenix_init.h} | 29 ++-- include/{fenix_opt.h => fenix_opt.hpp} | 0 ..._recovery.h => fenix_process_recovery.hpp} | 6 +- include/{fenix_util.h => fenix_util.hpp} | 2 +- src/CMakeLists.txt | 4 +- src/fenix.cpp | 8 +- src/fenix_callbacks.cpp | 13 +- src/fenix_comm_list.cpp | 138 ------------------ src/fenix_data_group.cpp | 8 +- src/fenix_data_member.cpp | 8 +- src/fenix_data_policy.cpp | 9 +- src/fenix_data_policy_in_memory_raid.cpp | 12 +- src/fenix_data_recovery.cpp | 11 +- src/fenix_data_subset.cpp | 5 +- src/fenix_mpi_override.cpp | 76 ---------- src/fenix_opt.cpp | 6 +- src/fenix_process_recovery.cpp | 18 +-- src/fenix_util.cpp | 6 +- src/globals.cpp | 2 +- test/failed_spares/fenix_failed_spares.c | 1 + test/issend/fenix_issend_test.c | 1 + test/no_jump/fenix_no_jump_test.c | 1 + .../fenix_req_cancelled_test.c | 1 + .../fenix_request_tracking_test.c | 1 + .../fenix_subset_internal_test.c | 1 - 35 files changed, 93 insertions(+), 392 deletions(-) rename include/{fenix_comm_list.h => fenix_comm_list.hpp} (100%) rename include/{fenix_data_group.h => fenix_data_group.hpp} (97%) rename include/{fenix_data_member.h => fenix_data_member.hpp} (98%) rename include/{fenix_data_packet.h => fenix_data_packet.hpp} (100%) rename include/{fenix_data_policy.h => fenix_data_policy.hpp} (98%) rename include/{fenix_data_policy_in_memory_raid.h => fenix_data_policy_in_memory_raid.hpp} (98%) rename include/{fenix_data_recovery.h => fenix_data_recovery.hpp} (98%) rename include/{fenix_ext.h => fenix_ext.hpp} (96%) delete mode 100644 include/fenix_f.h rename include/{fenix_process_recovery_global.h => fenix_init.h} (84%) rename include/{fenix_opt.h => fenix_opt.hpp} (100%) rename include/{fenix_process_recovery.h => fenix_process_recovery.hpp} (96%) rename include/{fenix_util.h => fenix_util.hpp} (99%) delete mode 100644 src/fenix_comm_list.cpp delete mode 100644 src/fenix_mpi_override.cpp diff --git a/include/fenix.h b/include/fenix.h index 7a68f82..28af25b 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -63,8 +63,10 @@ #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif + + #include "fenix_data_subset.h" -#include "fenix_process_recovery.h" +#include "fenix_init.h" /** * @file diff --git a/include/fenix_comm_list.h b/include/fenix_comm_list.hpp similarity index 100% rename from include/fenix_comm_list.h rename to include/fenix_comm_list.hpp diff --git a/include/fenix_data_group.h b/include/fenix_data_group.hpp similarity index 97% rename from include/fenix_data_group.h rename to include/fenix_data_group.hpp index 67cb079..35c76da 100644 --- a/include/fenix_data_group.h +++ b/include/fenix_data_group.hpp @@ -56,11 +56,13 @@ #ifndef __FENIX_DATA_GROUP_H__ #define __FENIX_DATA_GROUP_H__ +#include + #include #include "fenix.h" -#include "fenix_data_member.h" -#include "fenix_data_packet.h" -#include "fenix_util.h" +#include "fenix_data_member.hpp" +#include "fenix_data_packet.hpp" +#include "fenix_util.hpp" #include "fenix_data_subset.h" #define __FENIX_DEFAULT_GROUP_SIZE 32 @@ -68,6 +70,7 @@ typedef struct __fenix_group_vtbl fenix_group_vtbl_t; typedef struct __fenix_group fenix_group_t; + //This defines the functions which must be implemented by the group typedef struct __fenix_group_vtbl { int (*group_delete)(fenix_group_t* group); @@ -165,7 +168,7 @@ void __fenix_data_recovery_reinit( fenix_data_recovery_t *dr, fenix_two_containe void __fenix_ensure_data_recovery_capacity( fenix_data_recovery_t *dr); -int __fenix_search_groupid( int key, fenix_data_recovery_t *dr ); +int __fenix_search_groupid( int key, fenix_data_recovery_t *dr); int __fenix_find_next_group_position( fenix_data_recovery_t *dr ); diff --git a/include/fenix_data_member.h b/include/fenix_data_member.hpp similarity index 98% rename from include/fenix_data_member.h rename to include/fenix_data_member.hpp index 391142b..e41b3bd 100644 --- a/include/fenix_data_member.h +++ b/include/fenix_data_member.hpp @@ -57,8 +57,8 @@ #define __FENIX_DATA_MEMBER_H__ #include -#include "fenix_data_packet.h" -#include "fenix_util.h" +#include "fenix_data_packet.hpp" +#include "fenix_util.hpp" #define __FENIX_DEFAULT_MEMBER_SIZE 512 diff --git a/include/fenix_data_packet.h b/include/fenix_data_packet.hpp similarity index 100% rename from include/fenix_data_packet.h rename to include/fenix_data_packet.hpp diff --git a/include/fenix_data_policy.h b/include/fenix_data_policy.hpp similarity index 98% rename from include/fenix_data_policy.h rename to include/fenix_data_policy.hpp index b932985..58bc1a2 100644 --- a/include/fenix_data_policy.h +++ b/include/fenix_data_policy.hpp @@ -59,7 +59,7 @@ #include #include "fenix.h" -#include "fenix_data_group.h" +#include "fenix_data_group.hpp" int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, int policy_name, void* policy_value, int* flag); diff --git a/include/fenix_data_policy_in_memory_raid.h b/include/fenix_data_policy_in_memory_raid.hpp similarity index 98% rename from include/fenix_data_policy_in_memory_raid.h rename to include/fenix_data_policy_in_memory_raid.hpp index 931b0c7..e420c79 100644 --- a/include/fenix_data_policy_in_memory_raid.h +++ b/include/fenix_data_policy_in_memory_raid.hpp @@ -58,7 +58,7 @@ #define __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ #include -#include "fenix_data_group.h" +#include "fenix_data_group.hpp" void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, void* policy_value, int* flag); diff --git a/include/fenix_data_recovery.h b/include/fenix_data_recovery.hpp similarity index 98% rename from include/fenix_data_recovery.h rename to include/fenix_data_recovery.hpp index 4580cb9..717bb13 100644 --- a/include/fenix_data_recovery.h +++ b/include/fenix_data_recovery.hpp @@ -58,10 +58,10 @@ #define __FENIX_DATA_RECOVERY__ -#include "fenix_data_group.h" -#include "fenix_data_member.h" +#include "fenix_data_group.hpp" +#include "fenix_data_member.hpp" #include "fenix_data_subset.h" -#include "fenix_util.h" +#include "fenix_util.hpp" #include #include #include diff --git a/include/fenix_ext.h b/include/fenix_ext.hpp similarity index 96% rename from include/fenix_ext.h rename to include/fenix_ext.hpp index ef4dcc4..9930798 100644 --- a/include/fenix_ext.h +++ b/include/fenix_ext.hpp @@ -59,9 +59,9 @@ #include #include "fenix.h" -#include "fenix_opt.h" -#include "fenix_data_group.h" -#include "fenix_process_recovery.h" +#include "fenix_opt.hpp" +#include "fenix_data_group.hpp" +#include "fenix_process_recovery.hpp" typedef struct { int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init @@ -87,7 +87,6 @@ typedef struct { int *ret_error; fenix_callback_list_t* callback_list; // singly linked list for user-defined Fenix callback functions - //fenix_communicator_list_t* communicator_list; // singly linked list for Fenix resilient communicators fenix_debug_opt_t options; // This is reserved to store the user options MPI_Comm *world; // Duplicate of the MPI communicator provided by user diff --git a/include/fenix_f.h b/include/fenix_f.h deleted file mode 100644 index a8f06c0..0000000 --- a/include/fenix_f.h +++ /dev/null @@ -1,82 +0,0 @@ -!/* -!//@HEADER -!// ************************************************************************ -!// -!// -!// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -!// _| _| _|_| _| _| _| _| -!// _|_|_| _|_|_| _| _| _| _| _| -!// _| _| _| _|_| _| _| _| -!// _| _|_|_|_| _| _| _|_|_| _| _| -!// -!// -!// -!// -!// Copyright (C) 2016 Rutgers University and Sandia Corporation -!// -!// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!// the U.S. Government retains certain rights in this software. -!// -!// Redistribution and use in source and binary forms, with or without -!// modification, are permitted provided that the following conditions are -!// met: -!// -!// 1. Redistributions of source code must retain the above copyright -!// notice, this list of conditions and the following disclaimer. -!// -!// 2. Redistributions in binary form must reproduce the above copyright -!// notice, this list of conditions and the following disclaimer in the -!// documentation and/or other materials provided with the distribution. -!// -!// 3. Neither the name of the Corporation nor the names of the -!// contributors may be used to endorse or promote products derived from -!// this software without specific prior written permission. -!// -!// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!// -!// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -!// Michael Heroux, and Matthew Whitlock -!// -!// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -!// Marc Gamell (mgamell@cac.rutgers.edu) -!// -!// ************************************************************************ -!//@HEADER -!*/ - -#ifndef __FENIX_F_H__ -#define __FENIX_F_H__ - - -!/* ------------------------------------------------------------------------- */ -!/* */ -!/* */ -!/* D E F I N E S */ -!/* */ -!/* */ -!/* ------------------------------------------------------------------------- */ - -#if 1 -#define Fenix_Init(_process_role, _comm, _new_comm, _argc, _argv, _num_of_spare_ranks, _spawn, _info, _error) \ - jb \ - call Fenix_Preinit(_process_role, _comm, _new_comm, _argc, _argv, _spare_ranks, _spawn, _info, _error); \ - if(setjmp(jb) .ne. 0) then; \ - _process_status = 2; \ - endif; \ - call Fenix_Postinit( error ); - -#endif - -#endif - - diff --git a/include/fenix_process_recovery_global.h b/include/fenix_init.h similarity index 84% rename from include/fenix_process_recovery_global.h rename to include/fenix_init.h index 4116369..c4ca69b 100644 --- a/include/fenix_process_recovery_global.h +++ b/include/fenix_init.h @@ -44,7 +44,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, // Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and @@ -53,23 +53,24 @@ // ************************************************************************ //@HEADER */ -#ifndef __FENIX_PROCES_RECOVERY_GLOBAL_H__ -#define __FENIX_PROCES_RECOVERY_GLOBAL_H__ + +#ifndef __FENIX_INIT__ +#define __FENIX_INIT__ #include #include -#include -#include -#include -#include -#include -#include -#include "fenix_opt.h" -#include "fenix_util.h" -#include "fenix_data_group.h" +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +int __fenix_preinit(int *, MPI_Comm, MPI_Comm *, int *, char ***, int, int, MPI_Info, int *, jmp_buf *); + +void __fenix_postinit(int *); -/* This header file is intended to provide global variable definitions for fenix_process_recovery.c only */ +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif -#endif // __FENIX_PROCES_RECOVERY_GLOBAL_H__ +#endif diff --git a/include/fenix_opt.h b/include/fenix_opt.hpp similarity index 100% rename from include/fenix_opt.h rename to include/fenix_opt.hpp diff --git a/include/fenix_process_recovery.h b/include/fenix_process_recovery.hpp similarity index 96% rename from include/fenix_process_recovery.h rename to include/fenix_process_recovery.hpp index 9b85e04..1132992 100644 --- a/include/fenix_process_recovery.h +++ b/include/fenix_process_recovery.hpp @@ -66,6 +66,8 @@ #include #include +#include "fenix_init.h" + #define __FENIX_RESUME_AT_INIT 0 #define __FENIX_RESUME_NO_JUMP 200 @@ -92,8 +94,6 @@ typedef struct { fenix_comm_list_elm_t *tail; } fenix_comm_list_t; -int __fenix_preinit(int *, MPI_Comm, MPI_Comm *, int *, char ***, int, int, MPI_Info, int *, jmp_buf *); - int __fenix_create_new_world(); int __fenix_repair_ranks(); @@ -116,8 +116,6 @@ int __fenix_get_rank_role(); void __fenix_set_rank_role(int FenixRankRole); -void __fenix_postinit(int *); - int __fenix_detect_failures(int do_recovery); void __fenix_finalize(); diff --git a/include/fenix_util.h b/include/fenix_util.hpp similarity index 99% rename from include/fenix_util.h rename to include/fenix_util.hpp index 8f76275..a42d4c3 100644 --- a/include/fenix_util.h +++ b/include/fenix_util.hpp @@ -57,7 +57,7 @@ #ifndef __FENIX_UTIL__ #define __FENIX_UTIL__ -#include "fenix_process_recovery.h" +#include "fenix_process_recovery.hpp" #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a33ea2c..4764c7b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,11 +12,10 @@ configure_file (${CMAKE_SOURCE_DIR}/include/fenix-config.h.in "${CMAKE_CURRENT_BINARY_DIR}/fenix-config.h" @ONLY) #include_directories(${CMAKE_CURRENT_BINARY_DIR}) -FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h) +FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h*) set (Fenix_SOURCES fenix.cpp -fenix_mpi_override.cpp fenix_opt.cpp fenix_process_recovery.cpp fenix_util.cpp @@ -26,7 +25,6 @@ fenix_data_policy.cpp fenix_data_policy_in_memory_raid.cpp fenix_data_member.cpp fenix_data_subset.cpp -fenix_comm_list.cpp fenix_callbacks.cpp globals.cpp ) diff --git a/src/fenix.cpp b/src/fenix.cpp index 5c03796..525f57d 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -54,10 +54,10 @@ //@HEADER */ -#include "fenix_data_recovery.h" -#include "fenix_process_recovery.h" -#include "fenix_util.h" -#include "fenix_ext.h" +#include "fenix_data_recovery.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_util.hpp" +#include "fenix_ext.hpp" #include "fenix.h" const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_FULL}; diff --git a/src/fenix_callbacks.cpp b/src/fenix_callbacks.cpp index 1e0c913..400535e 100644 --- a/src/fenix_callbacks.cpp +++ b/src/fenix_callbacks.cpp @@ -56,13 +56,12 @@ #include -#include "fenix_comm_list.h" -#include "fenix_ext.h" -#include "fenix_process_recovery.h" -#include "fenix_data_group.h" -#include "fenix_data_recovery.h" -#include "fenix_opt.h" -#include "fenix_util.h" +#include "fenix_ext.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" #include diff --git a/src/fenix_comm_list.cpp b/src/fenix_comm_list.cpp deleted file mode 100644 index 1a805fe..0000000 --- a/src/fenix_comm_list.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include - -fenix_comm_list_t my_list = {NULL, NULL}; - -int __fenix_comm_push(MPI_Comm *comm) { - fenix_comm_list_elm_t *current = (fenix_comm_list_elm_t *) malloc(sizeof(fenix_comm_list_elm_t)); - if (!current) return 0; - current->next = NULL; - current->comm = comm; - if (!my_list.tail) { - /* if list was empty, initialize head and tail */ - current->prev = NULL; - my_list.head = my_list.tail = current; - } - else { - /* if list was not empty, add element to the head of the list */ - current->prev = my_list.head; - my_list.head->next = current; - my_list.head = current; - } - return FENIX_SUCCESS; -} - -int __fenix_comm_delete(MPI_Comm *comm) { - - fenix_comm_list_elm_t *current = my_list.tail; - while (current) { - if (*(current->comm) == *comm) { - if (current != my_list.head && current != my_list.tail) { - current->prev->next = current->next; - current->next->prev = current->prev; - } - else if (current == my_list.tail) { - if (current->next) { - current->next->prev = NULL; - my_list.tail = current->next; - } - else my_list.tail = my_list.head = NULL; - } - else { - if (current->prev) { - current->prev->next = NULL; - my_list.head = current->prev; - } - else my_list.tail = my_list.head = NULL; - } - MPIX_Comm_revoke(*comm); - PMPI_Comm_free(comm); - free(current); - return 1; - } - else current = current->next; - } - /* if we end up here, the requested communicator has not been found */ - return 0; -} - - -void __fenix_comm_list_destroy(void) { - if (my_list.tail == NULL) { - return; - } - else { - fenix_comm_list_elm_t *current = my_list.tail; - while (current->next) { - fenix_comm_list_elm_t *next = current->next; - MPIX_Comm_revoke(*current->comm); - PMPI_Comm_free(current->comm); - free(current); - current = next; - } - MPIX_Comm_revoke(*current->comm); - PMPI_Comm_free(current->comm); - free(current); - } - my_list.tail = my_list.head = NULL; -} - diff --git a/src/fenix_data_group.cpp b/src/fenix_data_group.cpp index ad453aa..1d9e5ff 100644 --- a/src/fenix_data_group.cpp +++ b/src/fenix_data_group.cpp @@ -56,10 +56,10 @@ #include "mpi.h" #include "fenix-config.h" -#include "fenix_ext.h" -#include "fenix_data_group.h" -#include "fenix_data_member.h" -#include "fenix_data_packet.h" +#include "fenix_ext.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_member.hpp" +#include "fenix_data_packet.hpp" diff --git a/src/fenix_data_member.cpp b/src/fenix_data_member.cpp index 3d9d60d..7971d85 100644 --- a/src/fenix_data_member.cpp +++ b/src/fenix_data_member.cpp @@ -56,10 +56,10 @@ #include "mpi.h" #include "fenix-config.h" -#include "fenix_ext.h" -#include "fenix_data_recovery.h" -#include "fenix_data_member.h" -#include "fenix_data_packet.h" +#include "fenix_ext.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_data_member.hpp" +#include "fenix_data_packet.hpp" /** diff --git a/src/fenix_data_policy.cpp b/src/fenix_data_policy.cpp index 603aff1..903bf54 100644 --- a/src/fenix_data_policy.cpp +++ b/src/fenix_data_policy.cpp @@ -55,11 +55,10 @@ */ #include -#include "fenix_data_policy_in_memory_raid.h" -#include "fenix_data_policy.h" -#include "fenix_data_group.h" -#include "fenix_opt.h" -#include "fenix_ext.h" +#include "fenix_data_policy_in_memory_raid.hpp" +#include "fenix_data_policy.hpp" +#include "fenix_data_group.hpp" +#include "fenix_opt.hpp" #include "fenix.h" /** diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index 5b17aca..70e0d1b 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -56,13 +56,13 @@ #include #include "fenix.h" -#include "fenix_ext.h" -#include "fenix_opt.h" +#include "fenix_ext.hpp" +#include "fenix_opt.hpp" #include "fenix_data_subset.h" -#include "fenix_data_recovery.h" -#include "fenix_data_policy.h" -#include "fenix_data_group.h" -#include "fenix_data_member.h" +#include "fenix_data_recovery.hpp" +#include "fenix_data_policy.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_member.hpp" #define __FENIX_IMR_DEFAULT_MENTRY_NUM 10 #define __FENIX_IMR_NO_MEMBERS 16000 diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index 0fba896..914d1b0 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -56,12 +56,11 @@ -#include "fenix_data_recovery.h" -#include "fenix_data_policy.h" -#include "fenix_opt.h" -//#include "fenix_process_recovery.h" -#include "fenix_util.h" -#include "fenix_ext.h" +#include "fenix_data_recovery.hpp" +#include "fenix_data_policy.hpp" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" +#include "fenix_ext.hpp" #include diff --git a/src/fenix_data_subset.cpp b/src/fenix_data_subset.cpp index ffbb864..430fc1e 100644 --- a/src/fenix_data_subset.cpp +++ b/src/fenix_data_subset.cpp @@ -55,8 +55,9 @@ */ #include "mpi.h" -#include "fenix-config.h" -#include "fenix_ext.h" +#include "fenix.h" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" #include "fenix_data_subset.h" diff --git a/src/fenix_mpi_override.cpp b/src/fenix_mpi_override.cpp deleted file mode 100644 index 3761348..0000000 --- a/src/fenix_mpi_override.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ -#include "fenix_process_recovery.h" -#include "fenix_comm_list.h" -#include -#include -#include "fenix_ext.h" - -static inline -int __fenix_notify_newcomm(int ret, MPI_Comm *newcomm) -{ - if (ret != MPI_SUCCESS || - !fenix.fenix_init_flag || - *newcomm == MPI_COMM_NULL) return ret; - - if (__fenix_comm_push(newcomm) != FENIX_SUCCESS) { - fprintf(stderr, "[fenix error] Did not manage to push communicator\n"); - PMPI_Comm_free(newcomm); - ret = MPI_ERR_INTERN; - } - - return ret; -} diff --git a/src/fenix_opt.cpp b/src/fenix_opt.cpp index f6f6fac..5a44298 100644 --- a/src/fenix_opt.cpp +++ b/src/fenix_opt.cpp @@ -56,9 +56,9 @@ #include #include -#include "fenix_opt.h" -#include "fenix_util.h" -#include "fenix_ext.h" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" +#include "fenix_ext.hpp" #define DEBUG 1 diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index 42a7949..18d7fea 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -56,14 +56,12 @@ #include -#include "fenix_ext.h" -#include "fenix_comm_list.h" -#include "fenix_process_recovery_global.h" -#include "fenix_process_recovery.h" -#include "fenix_data_group.h" -#include "fenix_data_recovery.h" -#include "fenix_opt.h" -#include "fenix_util.h" +#include "fenix_ext.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" #include #include @@ -856,13 +854,9 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); - __fenix_comm_list_destroy(); - fenix.repair_result = __fenix_repair_ranks(); break; case MPI_ERR_REVOKED: - __fenix_comm_list_destroy(); - fenix.repair_result = __fenix_repair_ranks(); break; case MPI_ERR_INTERN: diff --git a/src/fenix_util.cpp b/src/fenix_util.cpp index b56d237..246ea3f 100644 --- a/src/fenix_util.cpp +++ b/src/fenix_util.cpp @@ -54,9 +54,9 @@ //@HEADER */ -#include "fenix_opt.h" -#include "fenix_process_recovery.h" -#include "fenix_util.h" +#include "fenix_opt.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_util.hpp" char* logname; diff --git a/src/globals.cpp b/src/globals.cpp index e812a08..8285983 100644 --- a/src/globals.cpp +++ b/src/globals.cpp @@ -54,7 +54,7 @@ //@HEADER */ -#include "fenix_ext.h" +#include "fenix_ext.hpp" fenix_t fenix = { .fenix_init_flag = 0 diff --git a/test/failed_spares/fenix_failed_spares.c b/test/failed_spares/fenix_failed_spares.c index bea1dd7..6b202aa 100644 --- a/test/failed_spares/fenix_failed_spares.c +++ b/test/failed_spares/fenix_failed_spares.c @@ -61,6 +61,7 @@ #include #include #include +#include const int kKillID = 1; diff --git a/test/issend/fenix_issend_test.c b/test/issend/fenix_issend_test.c index 0159297..212a7ae 100644 --- a/test/issend/fenix_issend_test.c +++ b/test/issend/fenix_issend_test.c @@ -61,6 +61,7 @@ #include #include #include +#include const int kKillID = 1; diff --git a/test/no_jump/fenix_no_jump_test.c b/test/no_jump/fenix_no_jump_test.c index 82187a3..31eb9f3 100644 --- a/test/no_jump/fenix_no_jump_test.c +++ b/test/no_jump/fenix_no_jump_test.c @@ -61,6 +61,7 @@ #include #include #include +#include const int kKillID = 1; diff --git a/test/request_cancelled/fenix_req_cancelled_test.c b/test/request_cancelled/fenix_req_cancelled_test.c index 3d7c89a..554f7f4 100644 --- a/test/request_cancelled/fenix_req_cancelled_test.c +++ b/test/request_cancelled/fenix_req_cancelled_test.c @@ -60,6 +60,7 @@ #include #include #include +#include const int kKillID = 1; diff --git a/test/request_tracking/fenix_request_tracking_test.c b/test/request_tracking/fenix_request_tracking_test.c index f279e01..ae0dc11 100644 --- a/test/request_tracking/fenix_request_tracking_test.c +++ b/test/request_tracking/fenix_request_tracking_test.c @@ -4,6 +4,7 @@ #include #include #include //for memcpy +#include #ifndef RTT_NO_FENIX #include diff --git a/test/subset_internal/fenix_subset_internal_test.c b/test/subset_internal/fenix_subset_internal_test.c index 5dab905..3e797ed 100644 --- a/test/subset_internal/fenix_subset_internal_test.c +++ b/test/subset_internal/fenix_subset_internal_test.c @@ -56,7 +56,6 @@ */ #include -#include // Never called explicitly by the users #include #include #include From f22f3e0e260ea471e9cf6dd38dde4dc00d4cdcda Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Fri, 25 Apr 2025 10:08:15 -0500 Subject: [PATCH 03/21] Support c++ lambda callbacks --- include/fenix.h | 2 + include/fenix.hpp | 74 ++++++++++++++++++++++++++++++ include/fenix_ext.hpp | 5 +- include/fenix_process_recovery.hpp | 19 ++------ src/CMakeLists.txt | 2 + src/fenix.cpp | 10 +++- src/fenix_callbacks.cpp | 64 ++++---------------------- src/fenix_process_recovery.cpp | 12 ++--- 8 files changed, 106 insertions(+), 82 deletions(-) create mode 100644 include/fenix.hpp diff --git a/include/fenix.h b/include/fenix.h index 28af25b..c812ca4 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -61,6 +61,8 @@ #include #if defined(c_plusplus) || defined(__cplusplus) +#include "fenix.hpp" + extern "C" { #endif diff --git a/include/fenix.hpp b/include/fenix.hpp new file mode 100644 index 0000000..ff69498 --- /dev/null +++ b/include/fenix.hpp @@ -0,0 +1,74 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef __FENIX_HPP__ +#define __FENIX_HPP__ + +#include +#include +#include "fenix.h" + +/** + * @brief As the C-style callback, but accepts an std::function and does not use the void* pointer. + * + * @param[in] callback The function to register. + * + * @returnstatus + */ +int Fenix_Callback_register(std::function callback); + +#endif diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index 9930798..0026325 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -58,6 +58,7 @@ #define __FENIX_EXT_H__ #include +#include #include "fenix.h" #include "fenix_opt.hpp" #include "fenix_data_group.hpp" @@ -77,7 +78,7 @@ typedef struct { //enum FenixRankRole role; // Role of rank: initial, survivor or repair int role; // Role of rank: initial, survivor or repair - int fenix_init_flag; + int fenix_init_flag = 0; int fail_world_size; int* fail_world; @@ -86,7 +87,7 @@ typedef struct { int *ret_role; int *ret_error; - fenix_callback_list_t* callback_list; // singly linked list for user-defined Fenix callback functions + std::vector callbacks; fenix_debug_opt_t options; // This is reserved to store the user options MPI_Comm *world; // Duplicate of the MPI communicator provided by user diff --git a/include/fenix_process_recovery.hpp b/include/fenix_process_recovery.hpp index 1132992..f6ad346 100644 --- a/include/fenix_process_recovery.hpp +++ b/include/fenix_process_recovery.hpp @@ -67,21 +67,12 @@ #include #include "fenix_init.h" +#include #define __FENIX_RESUME_AT_INIT 0 #define __FENIX_RESUME_NO_JUMP 200 -typedef void (*recover)( MPI_Comm, int, void *); - -typedef struct fcouple { - recover x; - void *y; -} fenix_callback_func; - -typedef struct __fenix_callback_list { - fenix_callback_func *callback; - struct __fenix_callback_list *next; -} fenix_callback_list_t; +using fenix_callback_func = std::function; typedef struct __fenix_comm_list_elm { struct __fenix_comm_list_elm *next; @@ -98,16 +89,12 @@ int __fenix_create_new_world(); int __fenix_repair_ranks(); -int __fenix_callback_register(void (*recover)(MPI_Comm, int, void *), void *); +int __fenix_callback_register(fenix_callback_func& recover); int __fenix_callback_pop(); -void __fenix_callback_push(fenix_callback_list_t **, fenix_callback_func *); - void __fenix_callback_invoke_all(int error); -int __fenix_callback_destroy(fenix_callback_list_t *callback_list); - int* __fenix_get_fail_ranks(int *, int, int); int __fenix_spare_rank(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4764c7b..0256344 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -31,6 +31,8 @@ globals.cpp add_library( fenix STATIC ${Fenix_SOURCES}) +target_compile_features(fenix PRIVATE cxx_std_17) + target_link_libraries(fenix PUBLIC MPI::MPI_CXX) target_include_directories(fenix diff --git a/src/fenix.cpp b/src/fenix.cpp index 525f57d..a383ad6 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -58,13 +58,19 @@ #include "fenix_process_recovery.hpp" #include "fenix_util.hpp" #include "fenix_ext.hpp" -#include "fenix.h" +#include "fenix.hpp" const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_FULL}; const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_EMPTY}; +int Fenix_Callback_register(std::function callback){ + return __fenix_callback_register(callback); +} + int Fenix_Callback_register(void (*recover)(MPI_Comm, int, void *), void *callback_data) { - return __fenix_callback_register(recover, callback_data); + return Fenix_Callback_register([recover, callback_data](MPI_Comm comm, int fenix_error){ + recover(comm, fenix_error, callback_data); + }); } int Fenix_Callback_pop() { diff --git a/src/fenix_callbacks.cpp b/src/fenix_callbacks.cpp index 400535e..5f981ba 100644 --- a/src/fenix_callbacks.cpp +++ b/src/fenix_callbacks.cpp @@ -65,71 +65,27 @@ #include -int __fenix_callback_register(void (*recover)(MPI_Comm, int, void *), void *callback_data) +int __fenix_callback_register(fenix_callback_func& recover) { - int error_code = FENIX_SUCCESS; - if (fenix.fenix_init_flag) { - fenix_callback_func *fp = (fenix_callback_func *) s_malloc(sizeof(fenix_callback_func)); - fp->x = recover; - fp->y = callback_data; - __fenix_callback_push( &fenix.callback_list, fp); - } else { - error_code = FENIX_ERROR_UNINITIALIZED; - } - return error_code; + if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; + + fenix.callbacks.push_back(recover); + + return FENIX_SUCCESS; } int __fenix_callback_pop(){ if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; - if(fenix.callback_list == NULL) return FENIX_ERROR_CALLBACK_NOT_REGISTERED; - - fenix_callback_list_t* old_head = fenix.callback_list; - fenix.callback_list = old_head->next; + if(fenix.callbacks.empty()) return FENIX_ERROR_CALLBACK_NOT_REGISTERED; - free(old_head->callback); - free(old_head); + fenix.callbacks.pop_back(); return FENIX_SUCCESS; } void __fenix_callback_invoke_all(int error) { - fenix_callback_list_t *current = fenix.callback_list; - while (current != NULL) { - (current->callback->x)((MPI_Comm) fenix.new_world, error, - (void *) current->callback->y); - current = current->next; - } -} - -void __fenix_callback_push(fenix_callback_list_t **head, fenix_callback_func *fp) -{ - fenix_callback_list_t *callback = (fenix_callback_list_t *) malloc(sizeof(fenix_callback_list_t)); - callback->callback = fp; - callback->next = *head; - *head = callback; -} - -int __fenix_callback_destroy(fenix_callback_list_t *callback_list) -{ - int error_code = FENIX_SUCCESS; - - if ( fenix.fenix_init_flag ) { - - fenix_callback_list_t *current = callback_list; - - while (current != NULL) { - fenix_callback_list_t *old; - old = current; - current = current->next; - free( old->callback ); - free( old ); - } - - } else { - error_code = FENIX_ERROR_UNINITIALIZED; + for(auto it = fenix.callbacks.rbegin(); it != fenix.callbacks.rend(); it++){ + (*it)(*fenix.user_world, error); } - - return error_code; } - diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index 18d7fea..f785d15 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -785,13 +785,11 @@ void __fenix_finalize() free(fenix.fail_world); } - /* Free Callbacks */ - __fenix_callback_destroy( fenix.callback_list ); - /* Free data recovery interface */ __fenix_data_recovery_destroy( fenix.data_recovery ); - fenix.fenix_init_flag = 0; + /* Free up any C++ data structures, reset default variables */ + fenix = {}; } void __fenix_finalize_spare() @@ -823,13 +821,11 @@ void __fenix_finalize_spare() MPI_Comm_set_errhandler(*fenix.world, MPI_ERRORS_ARE_FATAL); MPI_Comm_free(fenix.world); - /* Free callbacks */ - __fenix_callback_destroy( fenix.callback_list ); - /* Free data recovery interface */ __fenix_data_recovery_destroy( fenix.data_recovery ); - fenix.fenix_init_flag = 0; + /* Free up any C++ data structures, reset default variables */ + fenix = {}; /* Future version do not close MPI. Jump to where Fenix_Finalize is called. */ MPI_Finalize(); From f7fcb39a211a80179fccd939e64822b3b6a50a72 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Fri, 25 Apr 2025 10:12:20 -0500 Subject: [PATCH 04/21] Add Fenix::CommException --- include/fenix.hpp | 11 +++ include/fenix_exception.hpp | 74 +++++++++++++++ src/CMakeLists.txt | 1 + src/fenix_exception.cpp | 12 +++ test/CMakeLists.txt | 1 + test/exception_throw/CMakeLists.txt | 15 ++++ test/exception_throw/fenix_exceptions.cpp | 104 ++++++++++++++++++++++ 7 files changed, 218 insertions(+) create mode 100644 include/fenix_exception.hpp create mode 100644 src/fenix_exception.cpp create mode 100644 test/exception_throw/CMakeLists.txt create mode 100644 test/exception_throw/fenix_exceptions.cpp diff --git a/include/fenix.hpp b/include/fenix.hpp index ff69498..588c9a4 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -61,6 +61,7 @@ #include #include #include "fenix.h" +#include "fenix_exception.hpp" /** * @brief As the C-style callback, but accepts an std::function and does not use the void* pointer. @@ -71,4 +72,14 @@ */ int Fenix_Callback_register(std::function callback); +/** + * @brief Registers a callback that throws a CommException + * + * This means no longjmp will occur, and instead applications + * will continue from their try-catch error handler. + * + * @returnstatus + */ +int register_exception_callback(); + #endif diff --git a/include/fenix_exception.hpp b/include/fenix_exception.hpp new file mode 100644 index 0000000..dfe8d10 --- /dev/null +++ b/include/fenix_exception.hpp @@ -0,0 +1,74 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef FENIX_EXCEPTION_HPP +#define FENIX_EXCEPTION_HPP + +#include +#include + +namespace Fenix { + +struct CommException : public std::exception { + MPI_Comm repaired_comm; + const int fenix_err; + CommException(MPI_Comm comm, int err) : + repaired_comm(comm), fenix_err(err) { }; +}; + +} + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0256344..5a8b7b0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,7 @@ FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h*) set (Fenix_SOURCES fenix.cpp +fenix_exception.cpp fenix_opt.cpp fenix_process_recovery.cpp fenix_util.cpp diff --git a/src/fenix_exception.cpp b/src/fenix_exception.cpp new file mode 100644 index 0000000..3ce629a --- /dev/null +++ b/src/fenix_exception.cpp @@ -0,0 +1,12 @@ +#include "fenix_exception.hpp" +#include "fenix.h" + +namespace Fenix { +int register_exception_callback(){ + return Fenix_Callback_register( + [](MPI_Comm repaired_comm, int fen_err){ + throw CommException(repaired_comm, fen_err); + } + ); +} +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c4f2e92..ba6f65c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,3 +5,4 @@ add_subdirectory(request_cancelled) add_subdirectory(no_jump) add_subdirectory(issend) add_subdirectory(failed_spares) +add_subdirectory(exception_throw) diff --git a/test/exception_throw/CMakeLists.txt b/test/exception_throw/CMakeLists.txt new file mode 100644 index 0000000..7cd5a58 --- /dev/null +++ b/test/exception_throw/CMakeLists.txt @@ -0,0 +1,15 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +add_executable(fenix_exceptions fenix_exceptions.cpp) +target_link_libraries(fenix_exceptions fenix MPI::MPI_CXX) + +add_test(NAME exception_throw + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_exceptions ${MPIEXEC_POSTFLAGS}) diff --git a/test/exception_throw/fenix_exceptions.cpp b/test/exception_throw/fenix_exceptions.cpp new file mode 100644 index 0000000..92fc9a0 --- /dev/null +++ b/test/exception_throw/fenix_exceptions.cpp @@ -0,0 +1,104 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + volatile int status = 0; + + MPI_Init(&argc, &argv); + + int fenix_role, error; + MPI_Comm res_comm; + MPI_Info info; + MPI_Info_create(&info); + MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + MPI_Info_set(info, "FENIX_UNHANDLED_MODE", "NO_JUMP"); + Fenix_Init(&fenix_role, MPI_COMM_WORLD, &res_comm, &argc, &argv, 0, 0, info, &error); + + Fenix::register_exception_callback(); + + if(fenix_role == FENIX_ROLE_SURVIVOR_RANK){ + printf("FAILURE: longjmp instead of exception\n"); + status = 1; + } + + if (fenix_role == FENIX_ROLE_INITIAL_RANK) { + int rank; + MPI_Comm_rank(res_comm, &rank); + if(rank == 1) raise(SIGKILL); + + try { + MPI_Barrier(res_comm); + printf("FAILURE: barrier finished without fault\n"); + status = 1; + } catch (Fenix::CommException e){ + printf("SUCCESS: caught CommException\n"); + } + } + + Fenix_Finalize(); + MPI_Finalize(); + + return status; +} From d51417eb62599a17c195d8bb1cdc88bdbe17a782 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Fri, 25 Apr 2025 10:16:00 -0500 Subject: [PATCH 05/21] Support data member inquiry functions --- include/fenix.h | 17 +- include/fenix_data_group.hpp | 23 +- include/fenix_data_member.hpp | 26 +- include/fenix_ext.hpp | 3 +- src/fenix.cpp | 4 +- src/fenix_data_group.cpp | 102 ++-- src/fenix_data_member.cpp | 227 ++------- src/fenix_data_policy_in_memory_raid.cpp | 33 +- src/fenix_data_recovery.cpp | 572 ++++------------------- 9 files changed, 236 insertions(+), 771 deletions(-) diff --git a/include/fenix.h b/include/fenix.h index c812ca4..46e7542 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -610,10 +610,23 @@ int Fenix_Data_subset_createv(int num_blocks, int *array_start_offsets, */ int Fenix_Data_subset_delete(Fenix_Data_subset *subset_specifier); -//!@unimplemented Get the number of members in a data group. +/** + * @brief Get the number of members in a data group. + * + * @param[in] group_id The group to query + * @param[out] number_of_members Number of members in the group + */ int Fenix_Data_group_get_number_of_members(int group_id, int *number_of_members); -//!@unimplemented Get member ID based on member index +/** + * @brief Get member ID based on member index + * + * See #Fenix_Data_group_get_number_of_members + * + * @param[in] group_id The group to query + * @param[out] member_id The member id at this index in the group + * @param[in] position The position to check, [0, number_of_members) + */ int Fenix_Data_group_get_member_at_position(int group_id, int *member_id, int position); diff --git a/include/fenix_data_group.hpp b/include/fenix_data_group.hpp index 35c76da..9add516 100644 --- a/include/fenix_data_group.hpp +++ b/include/fenix_data_group.hpp @@ -60,6 +60,7 @@ #include #include "fenix.h" +#include "fenix_ext.hpp" #include "fenix_data_member.hpp" #include "fenix_data_packet.hpp" #include "fenix_util.hpp" @@ -71,6 +72,13 @@ typedef struct __fenix_group_vtbl fenix_group_vtbl_t; typedef struct __fenix_group fenix_group_t; +namespace Fenix::Data { + +using member_iterator = std::pair; + +} //end namespace Fenix::Data + + //This defines the functions which must be implemented by the group typedef struct __fenix_group_vtbl { int (*group_delete)(fenix_group_t* group); @@ -140,7 +148,12 @@ typedef struct __fenix_group { int timestamp; int depth; int policy_name; - fenix_member_t *member; + std::vector members; + + //Search for id, returning {-1, nullptr} if not found. + Fenix::Data::member_iterator search_member(int id); + //As search_member, but print an error message is id not found. + Fenix::Data::member_iterator find_member(int id); } fenix_group_t; typedef struct __fenix_data_recovery { @@ -172,4 +185,12 @@ int __fenix_search_groupid( int key, fenix_data_recovery_t *dr); int __fenix_find_next_group_position( fenix_data_recovery_t *dr ); +namespace Fenix::Data { + +using group_iterator = std::pair; + +group_iterator find_group(int id, fenix_data_recovery_t *dr = fenix.data_recovery); + +} //end namespace Fenix::Data + #endif // FENIX_DATA_GROUP_H diff --git a/include/fenix_data_member.hpp b/include/fenix_data_member.hpp index e41b3bd..94ef32b 100644 --- a/include/fenix_data_member.hpp +++ b/include/fenix_data_member.hpp @@ -63,42 +63,32 @@ #define __FENIX_DEFAULT_MEMBER_SIZE 512 +typedef struct __fenix_group fenix_group_t; + typedef struct __fenix_member_entry { - int memberid; + int memberid = -1; enum states state; - void *user_data; + void *user_data = nullptr; int datatype_size; int current_count; } fenix_member_entry_t; -typedef struct __fenix_member { - size_t count; - size_t total_size; - fenix_member_entry_t *member_entry; -} fenix_member_t; - typedef struct __member_entry_packet { int memberid; int datatype_size; int current_count; } fenix_member_entry_packet_t; -fenix_member_t *__fenix_data_member_init( ); -void __fenix_data_member_destroy( fenix_member_t *member ) ; - -void __fenix_ensure_member_capacity( fenix_member_t *m ); -void __fenix_ensure_version_capacity_from_member( fenix_member_t *m ); - -fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, +fenix_member_entry_t* __fenix_data_member_add_entry(fenix_group_t* group, int memberid, void* data, int count, int datatype_size); int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank); int __fenix_data_member_recv_metadata(int groupid, int src_rank, fenix_member_entry_packet_t* packet); -int __fenix_search_memberid(fenix_member_t* member, int memberid); -int __fenix_find_next_member_position(fenix_member_t *m); +int __fenix_search_memberid(fenix_group_t* group, int memberid); -void __fenix_data_member_reinit(fenix_member_t *m, fenix_two_container_packet_t packet, +void __fenix_data_member_reinit(fenix_group_t *group, fenix_two_container_packet_t packet, enum states mystatus); + #endif // FENIX_DATA_MEMBER_H diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index 0026325..56df9c0 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -61,9 +61,10 @@ #include #include "fenix.h" #include "fenix_opt.hpp" -#include "fenix_data_group.hpp" #include "fenix_process_recovery.hpp" +typedef struct __fenix_data_recovery fenix_data_recovery_t; + typedef struct { int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init int num_survivor_ranks; // Keeps the global information on the number of survived MPI ranks after failure diff --git a/src/fenix.cpp b/src/fenix.cpp index a383ad6..b4faa96 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -161,11 +161,11 @@ int Fenix_Data_subset_delete(Fenix_Data_subset *subset_specifier) { } int Fenix_Data_group_get_number_of_members(int group_id, int *number_of_members) { - return 0; + return __fenix_get_number_of_members(group_id, number_of_members); } int Fenix_Data_group_get_member_at_position(int group_id, int *member_id, int position) { - return 0; + return __fenix_get_member_at_position(group_id, member_id, position); } int Fenix_Data_group_get_number_of_snapshots(int group_id, int *number_of_snapshots) { diff --git a/src/fenix_data_group.cpp b/src/fenix_data_group.cpp index 1d9e5ff..0b41648 100644 --- a/src/fenix_data_group.cpp +++ b/src/fenix_data_group.cpp @@ -61,7 +61,35 @@ #include "fenix_data_member.hpp" #include "fenix_data_packet.hpp" +namespace Fenix::Data { +group_iterator find_group(int id, fenix_data_recovery_t* dr){ + int index = __fenix_search_groupid(id, dr); + if(index == -1){ + debug_print("ERROR: group_id <%d> does not exist\n", id); + return {index, nullptr}; + } + return {index, dr->group[index]}; +} + +} //end namespace Fenix::Data + +using namespace Fenix::Data; + +member_iterator fenix_group_t::search_member(int id){ + for(int i = 0; i < members.size(); i++){ + if(members[i].memberid == id){ + return {i, &(members[i])}; + } + } + return {-1, nullptr}; +} + +member_iterator fenix_group_t::find_member(int id){ + auto it = search_member(id); + if(it.first == -1) debug_print("ERROR group <%d>: member_id <%d> does not exist\n", groupid, id); + return it; +} /** * @brief @@ -85,12 +113,11 @@ fenix_data_recovery_t * __fenix_data_recovery_init() { } int __fenix_member_delete(int groupid, int memberid) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - if(group_index !=-1){ - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - } + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->find_member(memberid); + if(!mentry) return FENIX_ERROR_INVALID_MEMBERID; if (fenix.options.verbose == 38) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", @@ -98,38 +125,18 @@ int __fenix_member_delete(int groupid, int memberid) { member_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_delete: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_delete: memberid <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - fenix_group_t *group = (data_recovery->group[group_index]); - - retval = group->vtbl.member_delete(group, memberid); - - if(retval == FENIX_SUCCESS){ - fenix_member_t *member = group->member; - member->count--; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->state = DELETED; - } + int retval = group->vtbl.member_delete(group, memberid); - if (fenix.options.verbose == 38) { - fenix_member_t *member = group->member; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-state: %d", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member->count, mentry->state); - } + if(retval == FENIX_SUCCESS){ + group->members.erase(group->members.begin()+member_index); + } - retval = FENIX_SUCCESS; + if (fenix.options.verbose == 38) { + verbose_print("c-rank: %d, role: %d, m-count: %zu", + __fenix_get_current_rank(fenix.new_world), fenix.role, + group->members.size()); } + return retval; } @@ -150,8 +157,10 @@ int __fenix_group_delete_direct(fenix_group_t* group){ return group->vtbl.group_delete(group); } -int __fenix_data_recovery_remove_group(fenix_data_recovery_t* data_recovery, int group_index){ +int __fenix_data_recovery_remove_group(int group_index){ int retval = !FENIX_SUCCESS; + auto data_recovery = fenix.data_recovery; + if(group_index != -1){ for(int index = group_index; index < data_recovery->count-1; index++){ data_recovery->group[index] = data_recovery->group[index+1]; @@ -167,28 +176,21 @@ int __fenix_data_recovery_remove_group(fenix_data_recovery_t* data_recovery, int * @param group_id */ int __fenix_group_delete(int groupid) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; if (fenix.options.verbose == 37) { verbose_print("c-rank: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), group_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_group_delete: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - /* Delete Process */ - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - fenix_group_t *group = (data_recovery->group[group_index]); - retval = __fenix_group_delete_direct(group); - - if(retval == FENIX_SUCCESS){ - retval = __fenix_data_recovery_remove_group(data_recovery, group_index); - } + /* Delete Process */ + int retval = __fenix_group_delete_direct(group); + if(retval == FENIX_SUCCESS){ + retval = __fenix_data_recovery_remove_group(group_index); } + return retval; } diff --git a/src/fenix_data_member.cpp b/src/fenix_data_member.cpp index 7971d85..b5a34da 100644 --- a/src/fenix_data_member.cpp +++ b/src/fenix_data_member.cpp @@ -62,196 +62,58 @@ #include "fenix_data_packet.hpp" -/** - * @brief - */ -fenix_member_t *__fenix_data_member_init() { - fenix_member_t *member = (fenix_member_t *) - s_calloc(1, sizeof(fenix_member_t)); - member->count = 0; - member->total_size = __FENIX_DEFAULT_MEMBER_SIZE; - member->member_entry = (fenix_member_entry_t *) s_malloc( - __FENIX_DEFAULT_MEMBER_SIZE * sizeof(fenix_member_entry_t)); - - if (fenix.options.verbose == 42) { - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, member->count, - member->total_size); - } - - int member_index; - for (member_index = 0; member_index < - __FENIX_DEFAULT_MEMBER_SIZE; member_index++) { // insert default values - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->memberid = -1; - mentry->state = EMPTY; - - if (fenix.options.verbose == 42) { - verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - mentry->memberid, mentry->state); - } - } - return member; -} - -void __fenix_data_member_destroy( fenix_member_t *member ) { - free( member->member_entry ); - free( member ); -} +using namespace Fenix::Data; /** * @brief * @param * @param */ -int __fenix_search_memberid(fenix_member_t* member, int key) { - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - int member_index, found = -1, index = -1; - for (member_index = 0; - (found != 1) && (member_index < member->total_size); member_index++) { - - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - if (!(mentry->state == EMPTY || mentry->state == DELETED) && key == mentry->memberid) { - index = member_index; - found = 1; - } - } - return index; +int __fenix_search_memberid(fenix_group_t* group, int key) { + return group->search_member(key).first; } -/** - * @brief - * @param - */ -int __fenix_find_next_member_position(fenix_member_t *member) { - __fenix_ensure_member_capacity(member); - - int member_index, found = -1, index = -1; - for (member_index = 0; - (found != 1) && (member_index < member->total_size); member_index++) { - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - if (mentry->state == EMPTY || mentry->state == DELETED) { - index = member_index; - found = 1; - } - } - return index; -} - -fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, +fenix_member_entry_t* __fenix_data_member_add_entry(fenix_group_t* group, int memberid, void* data, int count, int datatype_size){ - - int member_index = __fenix_find_next_member_position(member); - fenix_member_entry_t* mentry = member->member_entry + member_index; - - mentry->memberid = memberid; - mentry->state = OCCUPIED; - mentry->user_data = data; - mentry->current_count = count; - mentry->datatype_size = datatype_size; - - member->count++; - - return mentry; + fenix_member_entry_t mentry; + mentry.memberid = memberid; + mentry.state = OCCUPIED; + mentry.user_data = data; + mentry.current_count = count; + mentry.datatype_size = datatype_size; + group->members.push_back(mentry); + + return &group->members.back(); } -/** - * @brief - * @param - */ -void __fenix_ensure_member_capacity(fenix_member_t *m) { - fenix_member_t *member = m; - if (member->count +1 >= member->total_size) { - int start_index = member->total_size; - member->member_entry = (fenix_member_entry_t *) s_realloc(member->member_entry, - (member->total_size * 2) * - sizeof(fenix_member_entry_t)); - member->total_size = member->total_size * 2; - - if (fenix.options.verbose == 52) { - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member->count, member->total_size); - } - - int member_index; - for (member_index = start_index; member_index < member->total_size; member_index++) { - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->memberid = -1; - mentry->state = EMPTY; - - if (fenix.options.verbose == 52) { - verbose_print( - "c-rank: %d, role: %d, member[%d] m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member_index, mentry->memberid, mentry->state); - } - } - } -} - - int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank){ - int retval = -1; - - fenix_data_recovery_t* data_recovery = fenix.data_recovery; - int group_index = __fenix_search_groupid(groupid, data_recovery); - int member_index; - if(group_index != -1){ - member_index = __fenix_search_memberid( - data_recovery->group[group_index]->member, memberid); - } - - if(group_index == -1){ - debug_print("ERROR Fenix_Data_member_delete: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if(member_index == -1){ - debug_print("ERROR Fenix_Data_member_delete: memberid <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = data_recovery->group[group_index]; - fenix_member_entry_t mentry = group->member->member_entry[member_index]; - - fenix_member_entry_packet_t packet; - packet.memberid = mentry.memberid; - packet.datatype_size = mentry.datatype_size; - packet.current_count = mentry.current_count; + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; - MPI_Send(&packet, sizeof(packet), MPI_BYTE, dest_rank, RECOVER_MEMBER_ENTRY_TAG^groupid, - group->comm); + auto [member_index, member] = group->find_member(memberid); + if(!member) return FENIX_ERROR_INVALID_MEMBERID; - retval = FENIX_SUCCESS; - } + fenix_member_entry_packet_t packet; + packet.memberid = member->memberid; + packet.datatype_size = member->datatype_size; + packet.current_count = member->current_count; - return retval; + MPI_Send(&packet, sizeof(packet), MPI_BYTE, dest_rank, RECOVER_MEMBER_ENTRY_TAG^groupid, + group->comm); + + return FENIX_SUCCESS; } int __fenix_data_member_recv_metadata(int groupid, int src_rank, fenix_member_entry_packet_t* packet){ - int retval = -1; - - fenix_data_recovery_t* data_recovery = fenix.data_recovery; - int group_index = __fenix_search_groupid(groupid, data_recovery); - - if(group_index == -1){ - debug_print("ERROR Fenix_Data_member_delete: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t* group = data_recovery->group[group_index]; - - MPI_Recv((void*)packet, sizeof(fenix_member_entry_packet_t), MPI_BYTE, src_rank, - RECOVER_MEMBER_ENTRY_TAG^groupid, group->comm, NULL); + auto group = find_group(groupid).second; + if(!group) return FENIX_ERROR_INVALID_GROUPID; - retval = FENIX_SUCCESS; - } - + MPI_Recv((void*)packet, sizeof(fenix_member_entry_packet_t), MPI_BYTE, src_rank, + RECOVER_MEMBER_ENTRY_TAG^groupid, group->comm, NULL); - return retval; + return FENIX_SUCCESS; } @@ -260,32 +122,7 @@ int __fenix_data_member_recv_metadata(int groupid, int src_rank, * @param * @param */ -void __fenix_data_member_reinit(fenix_member_t *m, fenix_two_container_packet_t packet, +void __fenix_data_member_reinit(fenix_group_t *group, fenix_two_container_packet_t packet, enum states mystatus) { - fenix_member_t *member = m; - int start_index = member->total_size; - member->count = 0; - member->total_size = packet.total_size; - member->member_entry = (fenix_member_entry_t *) s_realloc(member->member_entry, - (member->total_size) * - sizeof(fenix_member_entry_t)); - if (fenix.options.verbose == 50) { - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member->count, member->total_size); - } - - int member_index; - /* Why start_index is set to the number of member entries ? */ - // for (member_index = start_index; member_index < member->size; member_index++) { - for (member_index = 0; member_index < member->total_size; member_index++) { - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->memberid = -1; - mentry->state = mystatus; - if (fenix.options.verbose == 50) { - verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - mentry->memberid, mentry->state); - } - } + group->members.clear(); } diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index 70e0d1b..940c2c6 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -145,7 +145,7 @@ void __imr_undo_restore(MPI_Comm comm, int err, void* data){ void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, void* policy_value, int* flag){ - *group = (fenix_group_t *)malloc(sizeof(fenix_imr_group_t)); + *group = (fenix_group_t *) new fenix_imr_group_t; fenix_imr_group_t *new_group = (fenix_imr_group_t *)(*group); new_group->base.vtbl.group_delete = *__imr_group_delete; new_group->base.vtbl.member_create = *__imr_member_create; @@ -453,8 +453,8 @@ int __imr_member_store(fenix_group_t* g, int member_id, fenix_member_entry_t* member_data; //Shouldn't need to check for failure to find the member, that should be done before //calling - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = &(group->base.member->member_entry[member_data_index]); + int member_data_index = __fenix_search_memberid(&group->base, member_id); + member_data = &(group->base.members[member_data_index]); if(found_member != FENIX_SUCCESS){ debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist on rank <%d>!\n", @@ -719,8 +719,8 @@ int __imr_member_restore(fenix_group_t* g, int member_id, fenix_member_entry_t member_data; if(found_member){ - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = group->base.member->member_entry[member_data_index]; + int member_data_index = __fenix_search_memberid(&group->base, member_id); + member_data = group->base.members[member_data_index]; } int recovery_locally_possible; @@ -801,8 +801,8 @@ int __imr_member_restore(fenix_group_t* g, int member_id, Fenix_Callback_register(__imr_undo_restore, (void*)undo_data); __imr_find_mentry(group, member_id, &mentry); - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = group->base.member->member_entry[member_data_index]; + int member_data_index = __fenix_search_memberid(&group->base, member_id); + member_data = group->base.members[member_data_index]; mentry->current_head = group->num_snapshots; @@ -899,7 +899,7 @@ int __imr_member_restore(fenix_group_t* g, int member_id, __fenix_data_member_recv_metadata(group->base.groupid, group->partners[my_set_rank==0 ? 1 : 0], &packet); //We remake the new member just like the user would. - __fenix_member_create(group->base.groupid, packet.memberid, NULL, packet.current_count, + __fenix_member_create(group->base.groupid, member_id, NULL, packet.current_count, packet.datatype_size); //Mark the member for deletion if another failure interrupts recovering fully. @@ -910,8 +910,8 @@ int __imr_member_restore(fenix_group_t* g, int member_id, __imr_find_mentry(group, member_id, &mentry); - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = group->base.member->member_entry[member_data_index]; + int member_data_index = __fenix_search_memberid(&group->base, member_id); + member_data = group->base.members[member_data_index]; MPI_Recv((void*)&(group->num_snapshots), 1, MPI_INT, (my_set_rank==0 ? 1 : 0), @@ -1077,8 +1077,8 @@ int __imr_member_lrestore(fenix_group_t* g, int member_id, return FENIX_ERROR_INVALID_MEMBERID; } - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - fenix_member_entry_t member_data = group->base.member->member_entry[member_data_index]; + int member_data_index = __fenix_search_memberid(&group->base, member_id); + fenix_member_entry_t member_data = group->base.members[member_data_index]; @@ -1236,21 +1236,18 @@ int __imr_get_redundant_policy(fenix_group_t* group, int* policy_name, policy_vals[1] = full_group->rank_separation; *flag = FENIX_SUCCESS; - return retval; + return retval; } int __imr_group_delete(fenix_group_t* g){ fenix_imr_group_t* group = (fenix_imr_group_t*) g; - for(int entry = 0; entry < group->base.member->count; entry++){ + for(int entry = 0; entry < group->base.members.size(); entry++){ __imr_member_free(group->entries+entry, g->depth); } free(group->entries); - //We have the responsibility of destroying the member array in the base group struct. - __fenix_data_member_destroy(group->base.member); - free(group->partners); - free(group); + delete group; return FENIX_SUCCESS; } diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index 914d1b0..b28da79 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -64,6 +64,8 @@ #include +using namespace Fenix::Data; + /** * @brief create new group or recover group data for lost processes * @param groud_id @@ -130,7 +132,6 @@ int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, group->timestart = timestart; group->timestamp = -1; //indicates no commits yet group->depth = depth; - group->member = __fenix_data_member_init(); group->comm = comm; MPI_Comm_rank(comm, &(group->current_rank)); @@ -190,10 +191,15 @@ int __fenix_group_get_redundancy_policy(int groupid, int* policy_name, int* poli * @param data_type */ int __fenix_member_create(int groupid, int memberid, void *data, int count, int datatype_size ) { - int retval = -1; - int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); - int member_index = -1; - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid ); + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->search_member(memberid); + if(mentry){ + debug_print("ERROR Fenix_Data_member_create: member_id <%d> already exists\n", + memberid); + return FENIX_ERROR_INVALID_MEMBERID; + } if (fenix.options.verbose == 13) { verbose_print("c-rank: %d, group_index: %d, member_index: %d\n", @@ -201,31 +207,12 @@ int __fenix_member_create(int groupid, int memberid, void *data, int count, int group_index, member_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_create: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index != -1) { - debug_print("ERROR Fenix_Data_member_create: member_id <%d> already exists\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; + //First, we'll make a fenix-core member entry, then pass that info to + //the specific data policy. + mentry = __fenix_data_member_add_entry(group, memberid, data, count, datatype_size); - } else { - - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - - //First, we'll make a fenix-core member entry, then pass that info to - //the specific data policy. - fenix_member_entry_t* mentry; - mentry = __fenix_data_member_add_entry(member, memberid, data, count, datatype_size); - - //Pass the info along to the policy - retval = group->vtbl.member_create(group, mentry); - - } - return retval; - /* No Potential Bug in 2/10/17 */ + //Pass the info along to the policy + return group->vtbl.member_create(group, mentry); } @@ -289,7 +276,7 @@ int __fenix_member_store(int groupid, int memberid, Fenix_Data_subset specifier) /* Check if the member id already exists. If so, the index of the storage space is assigned */ if (group_index !=-1 && memberid != FENIX_DATA_MEMBER_ALL) { - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid ); + member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid ); } if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { @@ -329,7 +316,7 @@ int __fenix_member_istore(int groupid, int memberid, Fenix_Data_subset specifier /* Check if the member id already exists. If so, the index of the storage space is assigned */ if (group_index !=-1 && memberid != FENIX_DATA_MEMBER_ALL) { - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid ); + member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid ); } if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { @@ -355,182 +342,6 @@ int __fenix_member_istore(int groupid, int memberid, Fenix_Data_subset specifier -void __fenix_subset(fenix_group_t *group, fenix_member_entry_t *me, Fenix_Data_subset *ss) { -#if 1 - fprintf(stderr,"ERROR Fenix_Subset is not currently supported\n"); - -#else - fenix_version_t *version = &(me->version); - fenix_local_entry_t *lentry = &(version->local_entry[version->position]); - fenix_remote_entry_t *rentry = &(version->remote_entry[version->position]); - - int i; - MPI_Status status; - - /* Store the local data */ - /* This version does not apply any storage saving scheme */ - memcpy(lentry->data, lentry->pdata, (lentry->count * lentry->size)); - - /* Check the subset */ - int subset_total_size = 0; - for( i = 0; i < ss->num_blocks; i++ ) { - int subset_start = ss->start_offset[i]; - int subset_end = ss->start_offset[i]; - int subset_stride = ss->start_offset[i]; - - } - subset_total_size = ss->num_blocks * ss->fblk_size; - - /* Create a buffer for sending data (lentry->size is a size of single element ) */ - void *subset_data = (void *) s_malloc(me->datatype_size * subset_total_size ); - - - /* This data exchange is not necessary when using non-v call */ - member_store_packet_t lentry_packet, rentry_packet; - lentry_packet.rank = lentry->currentrank; - lentry_packet.datatype = lentry->datatype; - lentry_packet.entry_count = lentry->count; - lentry_packet.entry_size = subset_total_size; - - int current_rank = __fenix_get_current_rank(fenix.new_world); - int current_role = fenix.role; - - MPI_Sendrecv(&lentry_packet, sizeof(member_store_packet_t), MPI_BYTE, ge->out_rank, - STORE_SIZE_TAG, &rentry_packet, sizeof(member_store_packet_t), MPI_BYTE, - ge->in_rank, STORE_SIZE_TAG, (ge->comm), &status); - - rentry->remoterank = rentry_packet.rank; - rentry->datatype = rentry_packet.datatype; - rentry->count = rentry_packet.entry_count; - rentry->size = rentry_packet.entry_size; - - if (rentry->data != NULL) { - rentry->data = s_malloc(rentry->count * rentry->size); - } - - /* Partner is sending subset */ - if( rentry->size != rentry->count ) { - /* Receive # of blocks */ - - } - /* Handle Subset */ - int subset_num_blocks = ss->num_blocks; - int subset_start = ss->start_offsets[0]; - int subset_end = ss->end_offsets[0]; - int subset_stride = ss->stride; - int subset_diff = subset_end - subset_start; - int subset_count = subset_num_blocks * subset_diff; - - int subset_block = 0; - int subset_index = 0; - void *subset_data = (void *) s_malloc(sizeof(void) * me->current_count); - - int data_index; - int data_steps = 0; - int data_count = me->current_count; - for (data_index = subset_start; (subset_block != subset_num_blocks - 1) && - (data_index < data_count); data_index++) { - if (data_steps != subset_diff) { - MPI_Sendrecv((lentry->data) + data_index, (1 * lentry->size), MPI_BYTE, ge->out_rank, - STORE_DATA_TAG, (rentry->data) + data_index, (1 * rentry->size), MPI_BYTE, - ge->in_rank, STORE_DATA_TAG, ge->comm, &status); - // memcpy((subset_data) + data_index, (me->current_buf) + data_index, sizeof(me->current_datatype)); - data_steps = data_steps + 1; - } else if (data_steps == subset_diff) { - data_steps = 0; - subset_block = subset_block + 1; - data_index = data_index + subset_stride - 1; - } - } - - /* Need to update the version info */ - if (version->position < version->size - 1) { - version->num_copies++; - version->position++; - } else { /* Back to 0 */ - version->position = 0; - } -#endif -} - - -#if 0 -/** - * @brief - * @param group_id - * @param member_id - * @param subset_specifier - */ -int __fenix_member_storev(int group_id, int member_id, Fenix_Data_subset subset_specifier) { - -/* - * Using the same routine for v and non-v routine. - */ - int retval = -1; - int group_index = __fenix_search_groupid( group_id, fenix.data_recovery ); - int member_index = __fenix_search_memberid(group_index, member_id); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_storev: group_id <%d> does not exist\n", - group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_storev: member_id <%d> does not exist\n", - member_id); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = fenix.data_recovery; - fenix_group_entry_t *gentry = &(group->group_entry[group_index]); - fenix_member_t *member = &(gentry->member); - __fenix_ensure_version_capacity(member); - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - fenix_version_t *version = &(mentry->version); - fenix_local_entry_t *lentry = &(version->local_entry[version->position]); - fenix_remote_entry_t *rentry = &(version->remote_entry[version->position]); - retval = FENIX_SUCCESS; - } - return retval; - -} -#endif - -#if 0 -/** - * @brief - * @param group_id - * @param member_id - * @param subset_specifier - * @param request - */ -int __fenix_member_istorev(int group_id, int member_id, Fenix_Data_subset subset_specifier, - Fenix_Request *request) { - - int retval = -1; - int group_index = __fenix_search_groupid(group_id, __fenixi_g_data_recovery ); - int member_index = __fenix_search_memberid(group_index, member_id); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_istorev: group_id <%d> does not exist\n", - group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_istorev: member_id <%d> does not exist\n", - member_id); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = fenix.data_recovery; - fenix_group_entry_t *gentry = &(group->group_entry[group_index]); - fenix_member_t *member = &(gentry->member); - __fenix_ensure_version_capacity(member); - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - fenix_version_t *version = &(mentry->version); - fenix_local_entry_t *lentry = &(version->local_entry[version->position]); - fenix_remote_entry_t *rentry = &(version->remote_entry[version->position]); - retval = FENIX_SUCCESS; - } - - return retval; -} -#endif - /** * @brief * @param group_id @@ -635,7 +446,7 @@ int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); int member_index = -1; - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); if (fenix.options.verbose == 25) { @@ -669,7 +480,7 @@ int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); int member_index = -1; - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); if (fenix.options.verbose == 25) { @@ -704,7 +515,7 @@ int __fenix_member_restore_from_rank(int groupid, int memberid, void *target_buf int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); int member_index = -1; - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); if (fenix.options.verbose == 25) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", @@ -732,17 +543,11 @@ int __fenix_member_restore_from_rank(int groupid, int memberid, void *target_buf * @param num_members */ int __fenix_get_number_of_members(int group_id, int *num_members) { - int retval = -1; - int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - *num_members = group->member->count; - retval = FENIX_SUCCESS; - } - return retval; + auto group = find_group(group_id).second; + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + *num_members = group->members.size(); + return FENIX_SUCCESS; } /** @@ -752,27 +557,18 @@ int __fenix_get_number_of_members(int group_id, int *num_members) { * @param position */ int __fenix_get_member_at_position(int group_id, int *member_id, int position) { - int retval = -1; - int group_index = __fenix_search_groupid(group_id, fenix.data_recovery); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - if (position < 0 || position > (member->total_size) - 1) { - debug_print( - "ERROR Fenix_Data_group_get_member_at_position: position <%d> must be a value between 0 and number_of_members-1 \n", - position); - retval = FENIX_ERROR_INVALID_POSITION; - } else { - int member_index = ((member->total_size) - 1) - position; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - *member_id = mentry->memberid; - retval = FENIX_SUCCESS; - } + auto [group_index, group] = find_group(group_id); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + if(position < 0 || position > group->members.size()){ + debug_print( + "ERROR Fenix_Data_group_get_member_at_position: position <%d> must be a value between 0 and number_of_members-1 \n", + position); + return FENIX_ERROR_INVALID_POSITION; } - return retval; + + *member_id = group->members[position].memberid; + return FENIX_SUCCESS; } /** @@ -827,37 +623,20 @@ int __fenix_get_snapshot_at_position(int groupid, int position, int *timestamp) */ int __fenix_member_get_attribute(int groupid, int memberid, int attributename, void *attributevalue, int *flag, int sourcerank) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; - if(group_index != -1){ - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - } + auto [member_index, mentry] = group->find_member(memberid); + if(!mentry) return FENIX_ERROR_INVALID_MEMBERID; if (fenix.options.verbose == 34) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_get: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_get: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - int retval = group->vtbl.member_get_attribute(group, mentry, attributename, - attributevalue, flag, sourcerank); - - } - return retval; + return group->vtbl.member_get_attribute(group, mentry, attributename, + attributevalue, flag, sourcerank); } /** @@ -870,74 +649,60 @@ int __fenix_member_get_attribute(int groupid, int memberid, int attributename, */ int __fenix_member_set_attribute(int groupid, int memberid, int attributename, void *attributevalue, int *flag) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->find_member(memberid); + if(!mentry) return FENIX_ERROR_INVALID_MEMBERID; - if(group_index != -1){ - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - } - if (fenix.options.verbose == 35) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_set: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_set: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - int my_datatype_size; - int myerr; - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - - //Always pass attribute changes along to group - they might have unknown attributes - //or side-effects to handle from changes. They get change info before - //changes are made, in case they need prior state. - retval = group->vtbl.member_set_attribute(group, mentry, attributename, - attributevalue, flag); + int my_datatype_size; + int myerr; + + //Always pass attribute changes along to group - they might have unknown attributes + //or side-effects to handle from changes. They get change info before + //changes are made, in case they need prior state. + int retval = group->vtbl.member_set_attribute(group, mentry, attributename, + attributevalue, flag); + + switch (attributename) { + case FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER: + mentry->user_data = attributevalue; + break; + case FENIX_DATA_MEMBER_ATTRIBUTE_COUNT: + mentry->current_count = *((int *) (attributevalue)); + retval = FENIX_SUCCESS; + break; + case FENIX_DATA_MEMBER_ATTRIBUTE_DATATYPE: + + myerr = MPI_Type_size(*((MPI_Datatype *)(attributevalue)), &my_datatype_size); + + if( myerr ) { + debug_print( + "ERROR Fenix_Data_member_attr_get: Fenix currently does not support this MPI_DATATYPE; invalid attribute_value <%d>\n", + attributevalue); + retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; + } + + mentry->datatype_size = my_datatype_size; + retval = FENIX_SUCCESS; + break; - switch (attributename) { - case FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER: - mentry->user_data = attributevalue; - break; - case FENIX_DATA_MEMBER_ATTRIBUTE_COUNT: - mentry->current_count = *((int *) (attributevalue)); - retval = FENIX_SUCCESS; - break; - case FENIX_DATA_MEMBER_ATTRIBUTE_DATATYPE: - - myerr = MPI_Type_size(*((MPI_Datatype *)(attributevalue)), &my_datatype_size); - - if( myerr ) { - debug_print( - "ERROR Fenix_Data_member_attr_get: Fenix currently does not support this MPI_DATATYPE; error %d\n", myerr - ); - retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; - } - - mentry->datatype_size = my_datatype_size; - retval = FENIX_SUCCESS; - break; - - default: - //Only an issue if the policy also doesn't have this attribute. - if(retval){ - debug_print("ERROR Fenix_Data_member_attr_get: invalid attribute_name <%d>\n", - attributename); - retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; - } - break; - } + default: + //Only an issue if the policy also doesn't have this attribute. + if(retval){ + debug_print("ERROR Fenix_Data_member_attr_get: invalid attribute_name <%d>\n", + attributename); + retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; + } + break; } + return retval; } @@ -964,164 +729,3 @@ int __fenix_snapshot_delete(int group_id, int time_stamp) { } return retval; } - -///////////////////////////////////////////////////// TODO // - -void __fenix_store_single() { - - -} - -#if 0 //This needs to be reworked for the new data redundancy framework. - //Lots of info about member versions etc. has been moved to policy-specific - //data. -/** - * - */ -void __feninx_dr_print_store() { - int group, member, version, local, remote; - fenix_data_recovery_t *current = fenix.data_recovery; - int group_count = current->count; - for (group = 0; group < group_count; group++) { - int member_count = current->group[group]->member->count; - for (member = 0; member < member_count; member++) { - int version_count = current->group[group]->member->member_entry[member].version->count; - for (version = 0; version < version_count; version++) { - int local_data_count = current->group[group]->member->member_entry[member].version->local_entry[version].count; - int *local_data = current->group[group]->member->member_entry[member].version->local_entry[version].data; - for (local = 0; local < local_data_count; local++) { - //printf("*** store rank[%d] group[%d] member[%d] local[%d]: %d\n", - //get_current_rank(fenix.new_world), group, member, local, - //local_data[local]); - } - int remote_data_count = current->group[group]->member->member_entry[member].version->remote_entry[version].count; - int *remote_data = current->group[group]->member->member_entry[member].version->remote_entry[version].data; - for (remote = 0; remote < remote_data_count; remote++) { - printf("*** store rank[%d] group[%d] member[%d] remote[%d]: %d\n", - __fenix_get_current_rank(fenix.new_world), group, member, remote, - remote_data[remote]); - } - } - } - } -} - -/** - * - */ - -void __fenix_dr_print_restore() { - fenix_data_recovery_t *current = fenix.data_recovery; - int group_count = current->count; - int member_count = current->group[0]->member->count; - int version_count = current->group[0]->member->member_entry[0].version->count; - int local_data_count = current->group[0]->member->member_entry[0].version->local_entry[0].count; - int remote_data_count = current->group[0]->member->member_entry[0].version->remote_entry[0].count; - printf("*** restore rank: %d; group: %d; member: %d; local: %d; remote: %d\n", - __fenix_get_current_rank(fenix.new_world), group_count, member_count, - local_data_count, - remote_data_count); -} - -/** - * - */ -void __fenix_dr_print_datastructure() { - int group_index, member_index, version_index, remote_data_index, local_data_index; - fenix_data_recovery_t *current = fenix.data_recovery; - - if (!current) { - return; - } - - printf("\n\ncurrent_rank: %d\n", __fenix_get_current_rank(fenix.new_world)); - int group_size = current->total_size; - for (group_index = 0; group_index < group_size; group_index++) { - int depth = current->group[group_index]->depth; - int groupid = current->group[group_index]->groupid; - int timestamp = current->group[group_index]->timestamp; - int group_state = current->group[group_index]->state; - int member_size = current->group[group_index]->member->total_size; - int member_count = current->group[group_index]->member->count; - switch (group_state) { - case EMPTY: - printf("group[%d] depth: %d groupid: %d timestamp: %d state: %s member.size: %d member.count: %d\n", - group_index, depth, groupid, timestamp, "EMPTY", member_size, - member_count); - break; - case OCCUPIED: - printf("group[%d] depth: %d groupid: %d timestamp: %d state: %s member.size: %d member.count: %d\n", - group_index, depth, groupid, timestamp, "OCCUPIED", member_size, - member_count); - break; - case DELETED: - printf("group[%d] depth: %d groupid: %d timestamp: %d state: %s member.size: %d member.count: %d\n", - group_index, depth, groupid, timestamp, "DELETED", member_size, - member_count); - break; - default: - break; - } - - for (member_index = 0; member_index < member_size; member_index++) { - int memberid = current->group[group_index]->member->member_entry[member_index].memberid; - int member_state = current->group[group_index]->member->member_entry[member_index].state; - int version_size = current->group[group_index]->member->member_entry[member_index].version->total_size; - int version_count = current->group[group_index]->member->member_entry[member_index].version->count; - switch (member_state) { - case EMPTY: - printf("group[%d] member[%d] memberid: %d state: %s depth.size: %d depth.count: %d\n", - group_index, member_index, memberid, "EMPTY", version_size, - version_count); - break; - case OCCUPIED: - printf("group[%d] member[%d] memberid: %d state: %s depth.size: %d depth.count: %d\n", - group_index, member_index, memberid, "OCCUPIED", version_size, - version_count); - break; - case DELETED: - printf("group[%d] member[%d] memberid: %d state: %s depth.size: %d depth.count: %d\n", - group_index, member_index, memberid, "DELETED", version_size, - version_count); - break; - default: - break; - } - - for (version_index = 0; version_index < version_size; version_index++) { - int local_data_count = current->group[group_index]->member->member_entry[member_index].version->local_entry[version_index].count; - printf("group[%d] member[%d] version[%d] local_data.count: %d\n", - group_index, - member_index, - version_index, local_data_count); - if (current->group[group_index]->member->member_entry[member_index].version->local_entry[version_index].data != - NULL) { - int *current_local_data = (int *) current->group[group_index]->member->member_entry[member_index].version->local_entry[version_index].data; - for (local_data_index = 0; local_data_index < local_data_count; local_data_index++) { - printf("group[%d] member[%d] depth[%d] local_data[%d]: %d\n", - group_index, - member_index, - version_index, local_data_index, - current_local_data[local_data_index]); - } - } - - int remote_data_count = current->group[group_index]->member->member_entry[member_index].version->remote_entry[version_index].count; - printf("group[%d] member[%d] version[%d] remote_data.count: %d\n", - group_index, - member_index, version_index, remote_data_count); - if (current->group[group_index]->member->member_entry[member_index].version->remote_entry[version_index].data != - NULL) { - int *current_remote_data = current->group[group_index]->member->member_entry[member_index].version->remote_entry[version_index].data; - for (remote_data_index = 0; remote_data_index < remote_data_count; remote_data_index++) { - printf("group[%d] member[%d] depth[%d] remote_data[%d]: %d\n", - group_index, - member_index, version_index, remote_data_index, - current_remote_data[remote_data_index]); - } - } - } - } - } -} -#endif From 945ee686d61dfb46dccd80ce2d978d7372b34fc6 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Fri, 25 Apr 2025 11:44:47 -0500 Subject: [PATCH 06/21] Expand Fenix config options --- include/fenix.h | 43 +++-- include/fenix.hpp | 10 -- include/fenix_ext.hpp | 57 +++--- include/fenix_init.h | 2 +- include/fenix_opt.hpp | 2 +- include/fenix_process_recovery.hpp | 8 +- src/CMakeLists.txt | 1 - src/fenix_exception.cpp | 12 -- src/fenix_process_recovery.cpp | 206 +++++++++------------- test/exception_throw/fenix_exceptions.cpp | 5 +- test/issend/fenix_issend_test.c | 2 +- test/no_jump/fenix_no_jump_test.c | 2 +- 12 files changed, 152 insertions(+), 198 deletions(-) delete mode 100644 src/fenix_exception.cpp diff --git a/include/fenix.h b/include/fenix.h index 46e7542..67be4b5 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -146,6 +146,30 @@ typedef enum { FENIX_ROLE_SURVIVOR_RANK = 2 } Fenix_Rank_role; +/** + * @brief Options for passing control back to application after recovery. + */ +typedef enum { + //!Return to Fenix_Init via longjmp (default) + JUMP, + //!Return the error code inline + RETURN, + //!Throw a Fenix::CommException + THROW +} Fenix_Resume_mode; + +/** + * @brief Options for dealing with 'unhandled' errors, e.g. invalid rank IDs + */ +typedef enum { + //!Ignore unhandled errors + SILENT, + //!Print error and continue without handling + PRINT, + //!Print error and abort Fenix's world (default) + ABORT +} Fenix_Unhandled_mode; + /** * @fn void Fenix_Init(int* role, MPI_Comm comm, MPI_Comm* newcomm, int** argc, char*** argv, int spare_ranks, int spawn, MPI_Info info, int* error); * @brief Build a resilient communicator and set the restart point. @@ -197,14 +221,13 @@ typedef enum { * @param[in] spawn *Unimplemented*: Whether to enable spawning new ranks to replace * failed ranks when spares are unavailable. * @param[in] info Fenix recovery configuration parameters, may be MPI_INFO_NULL - * Supports the "FENIX_RESUME_MODE" key, used to indicate where execution should resume upon + * "FENIX_RESUME_MODE" key is used to indicate where execution should resume upon * rank failure for all active (non-spare) ranks in any resilient communicators, not only for - * those ranks in communicators that failed. The following values associated with the - * "resume_mode" key are supported: - * - "Fenix_init" (default): execution resumes at logical exit of Fenix_Init. - * - "NO_JUMP": execution continues from the failing MPI call. Errors are otherwise handled - * as normal, but return the error code as well. Applications should typically - * either check for return codes or assign an error callback through Fenix. + * those ranks in communicators that failed. The value should be a string with the name of a + * Fenix_Resume_mode enum value. + * "FENIX_UNHANDLED_MODE" key is used to indicate how Fenix should handle error values + * returned by MPI functions that are unrelated to failed processes. The value should be + * a string with the name of a Fenix_Unhandled_mode enum value. * @param[out] error The return status of \c Fenix_Init
* Used to signal that a non-fatal error or special condition was encountered in the execution of * Fenix_Init, or FENIX_SUCCESS otherwise. It has the same value across all ranks released by @@ -221,10 +244,8 @@ typedef enum { *(_role) = __fenix_preinit(_role, _comm, _newcomm, _argc, \ _argv, _spare_ranks, _spawn, _info, \ _error, &bufjmp); \ - if(setjmp(bufjmp)) { \ - *(_role) = FENIX_ROLE_SURVIVOR_RANK; \ - } \ - __fenix_postinit( _error ); \ + setjmp(bufjmp); \ + __fenix_postinit(); \ } diff --git a/include/fenix.hpp b/include/fenix.hpp index 588c9a4..0234fe3 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -72,14 +72,4 @@ */ int Fenix_Callback_register(std::function callback); -/** - * @brief Registers a callback that throws a CommException - * - * This means no longjmp will occur, and instead applications - * will continue from their try-catch error handler. - * - * @returnstatus - */ -int register_exception_callback(); - #endif diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index 56df9c0..f140d90 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -66,49 +66,44 @@ typedef struct __fenix_data_recovery fenix_data_recovery_t; typedef struct { - int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init - int num_survivor_ranks; // Keeps the global information on the number of survived MPI ranks after failure - int num_recovered_ranks; // Keeps the number of spare ranks brought into MPI communicator recovery - int resume_mode; // Defines how program resumes after process recovery - int spawn_policy; // Indicate dynamic process spawning - int spare_ranks; // Spare ranks entered by user to repair failed ranks - int repair_result; // Internal global variable to store the result of MPI communicator repair - int finalized; + int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init + int num_survivor_ranks = 0; // Keeps the global information on the number of survived MPI ranks after failure + int num_recovered_ranks = 0; // Keeps the number of spare ranks brought into MPI communicator recovery + int spare_ranks; // Spare ranks entered by user to repair failed ranks + + int resume_mode = Fenix_Resume_mode::JUMP; + int unhandled_mode = Fenix_Unhandled_mode::ABORT; + int ignore_errs = false; // Temporarily ignore all errors & recovery + int spawn_policy; // Indicate dynamic process spawning jmp_buf *recover_environment; // Calling environment to fill the jmp_buf structure + int repair_result = FENIX_SUCCESS; // Internal variable to store the result of MPI comm repair + int role = FENIX_ROLE_INITIAL_RANK; - //enum FenixRankRole role; // Role of rank: initial, survivor or repair - int role; // Role of rank: initial, survivor or repair - int fenix_init_flag = 0; + int fenix_init_flag = false; + int finalized = false; - int fail_world_size; - int* fail_world; + int fail_world_size = 0; + int* fail_world = nullptr; //Save the pointer to role and error of Fenix_Init - int *ret_role; - int *ret_error; + int *ret_role = nullptr; + int *ret_error = nullptr; std::vector callbacks; - fenix_debug_opt_t options; // This is reserved to store the user options + fenix_debug_opt_t options; // This is reserved to store the user options - MPI_Comm *world; // Duplicate of the MPI communicator provided by user - MPI_Comm new_world; // Global MPI communicator identical to g_world but without spare ranks - MPI_Comm *user_world; // MPI communicator with repaired ranks - //Manage state of the comms. Necessary when failures happen rapidly, mussing up state - int new_world_exists, user_world_exists; - + MPI_Comm *world; // Duplicate of comm provided by user + MPI_Comm *user_world; // User-facing comm with repaired ranks and no spares + MPI_Comm new_world; // Internal duplicate of user_world + int new_world_exists = false, user_world_exists = false; + + //Values used for Fenix_Process_detect_failures int dummy_recv_buffer; MPI_Request check_failures_req; - - MPI_Op agree_op; // This is reserved for the global agreement call for Fenix data recovery API - - - MPI_Errhandler mpi_errhandler; // This stores callback info for our custom error handler - int ignore_errs; // Set this to return errors instead of using the error handler normally. (Don't forget to unset!) - int print_unhandled; // Set this to print the error string for MPI errors of an unhandled return type. - - + MPI_Op agree_op; // Global agreement call for Fenix data recovery API + MPI_Errhandler mpi_errhandler; // Our custom error handler fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure } fenix_t; diff --git a/include/fenix_init.h b/include/fenix_init.h index c4ca69b..19471d1 100644 --- a/include/fenix_init.h +++ b/include/fenix_init.h @@ -67,7 +67,7 @@ extern "C" { int __fenix_preinit(int *, MPI_Comm, MPI_Comm *, int *, char ***, int, int, MPI_Info, int *, jmp_buf *); -void __fenix_postinit(int *); +void __fenix_postinit(); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/include/fenix_opt.hpp b/include/fenix_opt.hpp index b032b02..2fb34d4 100644 --- a/include/fenix_opt.hpp +++ b/include/fenix_opt.hpp @@ -78,7 +78,7 @@ do { printf("%s(): " fmt, __func__, __VA_ARGS__); } while (0) typedef struct __fenix_debug_opt_t { - int verbose; + int verbose = -1; } fenix_debug_opt_t; diff --git a/include/fenix_process_recovery.hpp b/include/fenix_process_recovery.hpp index f6ad346..760617f 100644 --- a/include/fenix_process_recovery.hpp +++ b/include/fenix_process_recovery.hpp @@ -65,13 +65,11 @@ #include #include #include +#include #include "fenix_init.h" #include -#define __FENIX_RESUME_AT_INIT 0 -#define __FENIX_RESUME_NO_JUMP 200 - using fenix_callback_func = std::function; typedef struct __fenix_comm_list_elm { @@ -85,6 +83,10 @@ typedef struct { fenix_comm_list_elm_t *tail; } fenix_comm_list_t; +void __fenix_set_resume_mode(const std::string_view& name); + +void __fenix_set_unhandled_mode(const std::string_view& name); + int __fenix_create_new_world(); int __fenix_repair_ranks(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5a8b7b0..0256344 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,7 +16,6 @@ FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h*) set (Fenix_SOURCES fenix.cpp -fenix_exception.cpp fenix_opt.cpp fenix_process_recovery.cpp fenix_util.cpp diff --git a/src/fenix_exception.cpp b/src/fenix_exception.cpp deleted file mode 100644 index 3ce629a..0000000 --- a/src/fenix_exception.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "fenix_exception.hpp" -#include "fenix.h" - -namespace Fenix { -int register_exception_callback(){ - return Fenix_Callback_register( - [](MPI_Comm repaired_comm, int fen_err){ - throw CommException(repaired_comm, fen_err); - } - ); -} -} diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index f785d15..51b7a31 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -67,13 +67,13 @@ #include +using namespace Fenix; + int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, char ***argv, int spare_ranks, int spawn, MPI_Info info, int *error, jmp_buf *jump_environment) { - - int ret; *role = fenix.role; *error = 0; @@ -85,79 +85,25 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha MPI_Comm_dup(comm, fenix.world); PMPI_Comm_set_errhandler(*fenix.world, fenix.mpi_errhandler); - fenix.finalized = 0; fenix.spare_ranks = spare_ranks; fenix.spawn_policy = spawn; fenix.recover_environment = jump_environment; - fenix.role = FENIX_ROLE_INITIAL_RANK; - fenix.fail_world_size = 0; - fenix.ignore_errs = 0; - fenix.resume_mode = __FENIX_RESUME_AT_INIT; - fenix.repair_result = 0; fenix.ret_role = role; fenix.ret_error = error; - fenix.options.verbose = -1; - // __fenix_init_opt(*argc, *argv); - - // For request tracking, make sure we can save at least an integer - // in MPI_Request - if(sizeof(MPI_Request) < sizeof(int)) { - fprintf(stderr, "FENIX ERROR: __fenix_preinit: sizeof(MPI_Request) < sizeof(int)!\n"); - MPI_Abort(comm, -1); - } - - MPI_Op_create((MPI_User_function *) __fenix_ranks_agree, 1, &fenix.agree_op); /* Check the values in info */ if (info != MPI_INFO_NULL) { char value[MPI_MAX_INFO_VAL + 1]; int vallen = MPI_MAX_INFO_VAL; - int flag; - - MPI_Info_get(info, "FENIX_RESUME_MODE", vallen, value, &flag); - if (flag == 1) { - if (strcmp(value, "Fenix_init") == 0) { - fenix.resume_mode = __FENIX_RESUME_AT_INIT; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, value: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - } else if (strcmp(value, "NO_JUMP") == 0) { - fenix.resume_mode = __FENIX_RESUME_NO_JUMP; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, value: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } + int found; - } else { - /* No support. Setting it to Fenix_init */ - fenix.resume_mode = __FENIX_RESUME_AT_INIT; - } - } + MPI_Info_get(info, "FENIX_RESUME_MODE", vallen, value, &found); + if (found) __fenix_set_resume_mode(value); - - MPI_Info_get(info, "FENIX_UNHANDLED_MODE", vallen, value, &flag); - if (flag == 1) { - if (strcmp(value, "SILENT") == 0) { - fenix.print_unhandled = 0; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, UNHANDLED_MODE: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - } else if (strcmp(value, "NO_JUMP") == 0) { - fenix.print_unhandled = 1; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, UNHANDLED_MODE: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - - } else { - /* No support. Setting it to silent */ - fenix.print_unhandled = 0; - } - } + MPI_Info_get(info, "FENIX_UNHANDLED_MODE", vallen, value, &found); + if (found) __fenix_set_unhandled_mode(value); } if (fenix.spare_ranks >= __fenix_get_world_size(comm)) { @@ -175,13 +121,8 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha /* trigger an abort. */ /*****************************************************/ - ret = 1; - while (ret) { - ret = __fenix_create_new_world(); - if (ret) { - // just_repair_process(); - } - } + //Try to create new_world until success + while (__fenix_create_new_world()); if ( __fenix_spare_rank() != 1) { fenix.num_inital_ranks = __fenix_get_world_size(fenix.new_world); @@ -201,17 +142,16 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha } } - fenix.num_survivor_ranks = 0; - fenix.num_recovered_ranks = 0; + fenix.fenix_init_flag = true; while ( __fenix_spare_rank() == 1) { int a; int myrank; MPI_Status mpi_status; - fenix.ignore_errs = 1; - ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, + fenix.ignore_errs = true; + int ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, &mpi_status); // listen for a failure - fenix.ignore_errs = 0; + fenix.ignore_errs = false; if (ret == MPI_SUCCESS) { if (fenix.options.verbose == 0) { verbose_print("Finalize the program; rank: %d, role: %d\n", @@ -232,11 +172,37 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha if(fenix.role != FENIX_ROLE_RECOVERED_RANK) MPI_Comm_dup(fenix.new_world, fenix.user_world); - fenix.user_world_exists = 1; + fenix.user_world_exists = true; return fenix.role; } +void __fenix_set_resume_mode(const std::string_view& name){ + if (name == "JUMP") { + fenix.resume_mode = Fenix_Resume_mode::JUMP; + } else if (name == "RETURN") { + fenix.resume_mode = Fenix_Resume_mode::RETURN; + } else if (name == "THROW") { + fenix.resume_mode = Fenix_Resume_mode::THROW; + } else { + fprintf(stderr, "Unsupported FENIX_RESUME_MODE %s\n", name.data()); + MPI_Abort(*fenix.world, 1); + } +} + +void __fenix_set_unhandled_mode(const std::string_view& name){ + if (name == "SILENT") { + fenix.resume_mode = Fenix_Unhandled_mode::SILENT; + } else if (name == "PRINT") { + fenix.resume_mode = Fenix_Unhandled_mode::PRINT; + } else if (name == "ABORT") { + fenix.resume_mode = Fenix_Unhandled_mode::ABORT; + } else { + fprintf(stderr, "Unsupported FENIX_UNHANDLED_MODE %s\n", name.data()); + MPI_Abort(*fenix.world, 1); + } +} + int __fenix_spare_rank_within(MPI_Comm refcomm) { int result = -1; @@ -676,13 +642,10 @@ int __fenix_spare_rank(){ return __fenix_spare_rank_within(*fenix.world); } -void __fenix_postinit(int *error) +void __fenix_postinit() { - - //if (fenix.options.verbose == 9) { - // verbose_print(" postinit: current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), - // fenix.role); - //} + *fenix.ret_role = fenix.role; + *fenix.ret_error = fenix.repair_result; if(fenix.new_world_exists){ //Set up dummy irecv to use for checking for failures. @@ -690,19 +653,8 @@ void __fenix_postinit(int *error) 34095347, fenix.new_world, &fenix.check_failures_req); } - if (fenix.repair_result != 0) { - *error = fenix.repair_result; - } - fenix.fenix_init_flag = 1; - -#if 0 - if (fenix.role != FENIX_ROLE_INITIAL_RANK) { - init_data_recovery(); - } -#endif - if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { - __fenix_callback_invoke_all(*error); + __fenix_callback_invoke_all(*fenix.ret_error); } if (fenix.options.verbose == 9) { verbose_print("After barrier. current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), @@ -742,7 +694,7 @@ void __fenix_finalize() int last_spare_rank = __fenix_get_world_size(*fenix.world) - 1; //If we've reached here, we will finalized regardless of further errors. - fenix.ignore_errs = 1; + fenix.ignore_errs = true; while(!fenix.finalized){ int user_rank = __fenix_get_current_rank(*fenix.user_world); @@ -765,7 +717,7 @@ void __fenix_finalize() } else { //If rank 0 did contribute, we know sends made it, and regardless //of any other failures we finalize. - fenix.finalized = 1; + fenix.finalized = true; } } @@ -794,7 +746,7 @@ void __fenix_finalize() void __fenix_finalize_spare() { - fenix.fenix_init_flag = 0; + fenix.fenix_init_flag = false; int unused; MPI_Request agree_req, recv_req = MPI_REQUEST_NULL; @@ -842,45 +794,55 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) } switch (ret) { - case MPI_ERR_PROC_FAILED_PENDING: - case MPI_ERR_PROC_FAILED: - MPIX_Comm_revoke(*fenix.world); - MPIX_Comm_revoke(fenix.new_world); - - if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); - - - fenix.repair_result = __fenix_repair_ranks(); - break; - case MPI_ERR_REVOKED: - fenix.repair_result = __fenix_repair_ranks(); - break; - case MPI_ERR_INTERN: - printf("Fenix detected error: MPI_ERR_INTERN\n"); - default: - if(fenix.print_unhandled){ + case MPI_ERR_PROC_FAILED_PENDING: + case MPI_ERR_PROC_FAILED: + MPIX_Comm_revoke(*fenix.world); + MPIX_Comm_revoke(fenix.new_world); + + if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); + + fenix.repair_result = __fenix_repair_ranks(); + break; + case MPI_ERR_REVOKED: + fenix.repair_result = __fenix_repair_ranks(); + break; + default: int len; char errstr[MPI_MAX_ERROR_STRING]; MPI_Error_string(ret, errstr, &len); - fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); - } - return; - break; + switch (fenix.unhandled_mode) { + case ABORT: + fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); + MPI_Abort(*fenix.world, 1); + break; + case PRINT: + fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); + break; + case SILENT: + break; + default: + printf("Fenix internal error: Unknown unhandled mode %d\n", fenix.unhandled_mode); + assert(false); + break; + } + return; + break; } - fenix.role = FENIX_ROLE_SURVIVOR_RANK; + __fenix_postinit(); if(!fenix.finalized) { switch(fenix.resume_mode) { - case __FENIX_RESUME_AT_INIT: + case JUMP: longjmp(*fenix.recover_environment, 1); break; - case __FENIX_RESUME_NO_JUMP: - *(fenix.ret_role) = FENIX_ROLE_SURVIVOR_RANK; - __fenix_postinit(fenix.ret_error); + case RETURN: + break; + case THROW: + throw CommException(*fenix.user_world, *fenix.ret_error); break; default: - printf("Fenix detected error: Unknown resume mode\n"); + printf("Fenix internal error: Unknown resume mode %d\n", fenix.resume_mode); assert(false); break; } diff --git a/test/exception_throw/fenix_exceptions.cpp b/test/exception_throw/fenix_exceptions.cpp index 92fc9a0..11d4019 100644 --- a/test/exception_throw/fenix_exceptions.cpp +++ b/test/exception_throw/fenix_exceptions.cpp @@ -72,12 +72,9 @@ int main(int argc, char **argv) { MPI_Comm res_comm; MPI_Info info; MPI_Info_create(&info); - MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); - MPI_Info_set(info, "FENIX_UNHANDLED_MODE", "NO_JUMP"); + MPI_Info_set(info, "FENIX_RESUME_MODE", "THROW"); Fenix_Init(&fenix_role, MPI_COMM_WORLD, &res_comm, &argc, &argv, 0, 0, info, &error); - Fenix::register_exception_callback(); - if(fenix_role == FENIX_ROLE_SURVIVOR_RANK){ printf("FAILURE: longjmp instead of exception\n"); status = 1; diff --git a/test/issend/fenix_issend_test.c b/test/issend/fenix_issend_test.c index 212a7ae..23f3d85 100644 --- a/test/issend/fenix_issend_test.c +++ b/test/issend/fenix_issend_test.c @@ -87,7 +87,7 @@ int main(int argc, char **argv) { MPI_Info info; MPI_Info_create(&info); - MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + MPI_Info_set(info, "FENIX_RESUME_MODE", "RETURN"); int fenix_status; int recovered = 0; diff --git a/test/no_jump/fenix_no_jump_test.c b/test/no_jump/fenix_no_jump_test.c index 31eb9f3..cf5d261 100644 --- a/test/no_jump/fenix_no_jump_test.c +++ b/test/no_jump/fenix_no_jump_test.c @@ -87,7 +87,7 @@ int main(int argc, char **argv) { MPI_Info info; MPI_Info_create(&info); - MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + MPI_Info_set(info, "FENIX_RESUME_MODE", "RETURN"); int fenix_status; int recovered = 0; From 8051e093f3807b8a817596e49da0c226d1df1cdb Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Wed, 4 Jun 2025 16:05:29 -0500 Subject: [PATCH 07/21] Update data subsets and policies to c++, implement storev and resizeable members --- examples/01_hello_world/fenix/CMakeLists.txt | 1 + .../01_hello_world/fenix/fenix_hello_world.c | 1 + examples/02_send_recv/fenix/CMakeLists.txt | 7 +- examples/05_subset_create/subset_create.c | 36 +- examples/07_resizeable_member/CMakeLists.txt | 21 + examples/07_resizeable_member/resizeable.cpp | 203 ++ examples/CMakeLists.txt | 1 + include/fenix.h | 45 +- include/fenix.hpp | 62 + include/fenix_data_buffer.hpp | 124 ++ include/fenix_data_group.hpp | 105 +- include/fenix_data_member.hpp | 24 +- include/fenix_data_policy.hpp | 4 + include/fenix_data_policy_in_memory_raid.hpp | 171 +- include/fenix_data_recovery.hpp | 29 +- include/fenix_data_subset.h | 49 +- include/fenix_data_subset.hpp | 227 +++ include/fenix_ext.hpp | 6 +- include/fenix_opt.hpp | 61 +- include/fenix_process_recovery.hpp | 7 +- src/CMakeLists.txt | 32 +- src/fenix.cpp | 119 +- src/fenix_data_group.cpp | 30 +- src/fenix_data_member.cpp | 21 +- src/fenix_data_policy.cpp | 9 +- src/fenix_data_policy_in_memory_raid.cpp | 1726 +++++++---------- src/fenix_data_recovery.cpp | 127 +- src/fenix_data_subset.cpp | 1323 ++++++------- src/fenix_process_recovery.cpp | 110 +- test/CMakeLists.txt | 4 +- .../{subset_merging => storev}/CMakeLists.txt | 7 +- test/storev/storev.cpp | 201 ++ test/subset/CMakeLists.txt | 24 + .../subset_addition.cpp} | 131 +- test/subset/subset_common.hpp | 79 + test/subset/subset_copy.cpp | 113 ++ test/subset/subset_includes.cpp | 166 ++ test/subset/subset_subtraction.cpp | 119 ++ test/subset_internal/CMakeLists.txt | 13 - .../fenix_subset_merging_test.c | 176 -- 40 files changed, 3411 insertions(+), 2303 deletions(-) create mode 100644 examples/07_resizeable_member/CMakeLists.txt create mode 100644 examples/07_resizeable_member/resizeable.cpp create mode 100644 include/fenix_data_buffer.hpp create mode 100644 include/fenix_data_subset.hpp rename test/{subset_merging => storev}/CMakeLists.txt (63%) create mode 100644 test/storev/storev.cpp create mode 100644 test/subset/CMakeLists.txt rename test/{subset_internal/fenix_subset_internal_test.c => subset/subset_addition.cpp} (51%) create mode 100644 test/subset/subset_common.hpp create mode 100644 test/subset/subset_copy.cpp create mode 100644 test/subset/subset_includes.cpp create mode 100644 test/subset/subset_subtraction.cpp delete mode 100644 test/subset_internal/CMakeLists.txt delete mode 100644 test/subset_merging/fenix_subset_merging_test.c diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 6a344f4..b22ccb9 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -10,6 +10,7 @@ add_executable(fenix_hello_world fenix_hello_world.c) target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES}) +set_target_properties(fenix_hello_world PROPERTIES LINKER_LANGUAGE C) if(BUILD_TESTING) add_test(NAME hello_world diff --git a/examples/01_hello_world/fenix/fenix_hello_world.c b/examples/01_hello_world/fenix/fenix_hello_world.c index 9008ee7..6509a0b 100644 --- a/examples/01_hello_world/fenix/fenix_hello_world.c +++ b/examples/01_hello_world/fenix/fenix_hello_world.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index bf40679..e88d944 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -9,11 +9,12 @@ # add_executable(fenix_ring fenix_ring.c) -target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES} m ) +target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES}) +set_target_properties(fenix_ring PROPERTIES LINKER_LANGUAGE C) if(BUILD_TESTING) - add_test(NAME ring + add_test(NAME send_recv COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_ring ${MPIEXEC_POSTFLAGS} 1 2) - set_tests_properties(ring PROPERTIES + set_tests_properties(send_recv PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/05_subset_create/subset_create.c b/examples/05_subset_create/subset_create.c index c819318..23c15ca 100644 --- a/examples/05_subset_create/subset_create.c +++ b/examples/05_subset_create/subset_create.c @@ -66,11 +66,12 @@ int max_iter = 2; const int kCount = 100; const int kKillID = 2; +const int my_group = 0; +const int my_member = 0; int main(int argc, char **argv) { -fprintf(stderr, "Started\n"); int i; - int subset[500]; + int subset[kCount]; MPI_Status status; if (argc < 2) { @@ -86,7 +87,6 @@ fprintf(stderr, "Started\n"); int num_ranks; int rank; int error; - int my_group = 0; int my_timestamp = 0; int my_depth = 1; int recovered = 0; @@ -120,24 +120,33 @@ fprintf(stderr, "Started\n"); if (fenix_role == FENIX_ROLE_INITIAL_RANK) { // init my subset data - int index; - for (index = 0; index < kCount; index++) { + for (int index = 0; index < kCount; index++) { subset[index] = -1; } - Fenix_Data_member_create(my_group, 777, subset, kCount, MPI_INT); + Fenix_Data_member_create(my_group, my_member, subset, kCount, MPI_INT); //Store the entire data set for the initial commit. This is not a requirement. - Fenix_Data_member_store(my_group, 777, FENIX_DATA_SUBSET_FULL); + Fenix_Data_member_store(my_group, my_member, FENIX_DATA_SUBSET_FULL); Fenix_Data_commit_barrier(my_group, NULL); } else { //We've had a failure! Time to recover data. - fprintf(stderr, "Starting data recovery on node %d\n", rank); - Fenix_Data_member_restore(my_group, 777, subset, kCount, FENIX_TIME_STAMP_MAX, NULL); + fprintf(stderr, "Starting data recovery on rank %d\n", rank); + + //Set all data to a value that was never stored + for (int index = 0; index < kCount; index++) { + subset[index] = -2; + } + + int restore_ret = Fenix_Data_member_restore(my_group, my_member, subset, kCount, FENIX_TIME_STAMP_MAX, NULL); + + if(restore_ret != FENIX_SUCCESS){ + fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, restore_ret); + } int out_flag; - Fenix_Data_member_attr_set(my_group, 777, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, + Fenix_Data_member_attr_set(my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, subset, &out_flag); @@ -159,12 +168,11 @@ fprintf(stderr, "Started\n"); //We'll store only the small subset that we specified, though. //This means that as far as Fenix is concerned only data within that //subset was ever changed from the initialized value of -1 - Fenix_Data_member_store(my_group, 777, subset_specifier); + Fenix_Data_member_store(my_group, my_member, subset_specifier); Fenix_Data_commit_barrier(my_group, NULL); MPI_Barrier(new_comm); //Make sure everyone is done committing before we kill and restart everyone //else we may end up with only some nodes having the commit, and it being unusable - } @@ -172,7 +180,7 @@ fprintf(stderr, "Started\n"); if (rank == kKillID && recovered == 0) { fprintf(stderr, "Doing kill on node %d\n", rank); pid_t pid = getpid(); - kill(pid, SIGTERM); + kill(pid, SIGKILL); } //Make sure we've let rank 2 fail before proceeding, so we're definitely checking @@ -214,6 +222,6 @@ fprintf(stderr, "Started\n"); Fenix_Finalize(); - MPI_Finalize(); + //MPI_Finalize(); return !successful; //return error status } diff --git a/examples/07_resizeable_member/CMakeLists.txt b/examples/07_resizeable_member/CMakeLists.txt new file mode 100644 index 0000000..accefdd --- /dev/null +++ b/examples/07_resizeable_member/CMakeLists.txt @@ -0,0 +1,21 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +add_executable(resizeable resizeable.cpp) +target_link_libraries(resizeable fenix ${MPI_C_LIBRARIES}) + +target_compile_features(resizeable PRIVATE cxx_std_20) + +if(BUILD_TESTING) + add_test(NAME resizeable + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} resizeable ${MPIEXEC_POSTFLAGS} 1) + set_tests_properties(resizeable PROPERTIES + FAIL_REGULAR_EXPRESSION "FAILURE" LABELS "Example") +endif() diff --git a/examples/07_resizeable_member/resizeable.cpp b/examples/07_resizeable_member/resizeable.cpp new file mode 100644 index 0000000..2ebe789 --- /dev/null +++ b/examples/07_resizeable_member/resizeable.cpp @@ -0,0 +1,203 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr int kKillID = 2; +constexpr int my_group = 0; +constexpr int my_member = 0; +constexpr int start_timestamp = 0; +constexpr int group_depth = 1; +int errflag; + +using Fenix::DataSubset; +using namespace Fenix::Data; + +int main(int argc, char **argv) { + MPI_Init(&argc, &argv); + + MPI_Comm res_comm; + Fenix::init({.out_comm = &res_comm, .spares = 1}); + + int num_ranks, rank; + MPI_Comm_size(res_comm, &num_ranks); + MPI_Comm_rank(res_comm, &rank); + + std::vector data; + + bool should_throw = Fenix_get_role() == FENIX_ROLE_RECOVERED_RANK; + while(true) try { + if(should_throw){ + should_throw = false; + Fenix::throw_exception(); + } + + //Initial work and commits + if(Fenix_get_role() == FENIX_ROLE_INITIAL_RANK){ + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + Fenix_Data_member_create( + my_group, my_member, data.data(), FENIX_RESIZEABLE, MPI_INT + ); + + data.resize(100); + for(int& i : data) i = -1; + + + //Store the whole array first. We need to keep our buffer pointer updated + //since resizing an array can change it + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_store(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + //Now commit a smaller portion with different data. + data.resize(50); + int val = 1; + for(int& i : data) i = val++; + + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_store(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + if(rank == kKillID){ + fprintf(stderr, "Doing kill on node %d\n", rank); + raise(SIGTERM); + } + } + + Fenix_Finalize(); + + + break; + } catch (const Fenix::CommException& e) { + const Fenix::CommException* err = &e; + while(true) try { + //We've had a failure! Time to recover data. + fprintf(stderr, "Starting data recovery on rank %d\n", rank); + if(err->fenix_err != FENIX_SUCCESS){ + fprintf(stderr, "FAILURE on Fenix Init (%d). Exiting.\n", err->fenix_err); + exit(1); + } + + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + + //Do a null restore to get information about the stored subset + DataSubset stored_subset; + int ret = member_restore( + my_group, my_member, nullptr, 0, FENIX_TIME_STAMP_MAX, stored_subset + ); + if(ret != FENIX_SUCCESS) { + fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, ret); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + //Resize data to fit all stored data + data.resize(stored_subset.end()+1); + + //Set all data to a value that was never stored, just for testing + for(int& i : data) i = -2; + + //Now do an lrestore to get the recovered data. + ret = member_lrestore( + my_group, my_member, data.data(), data.size(), FENIX_TIME_STAMP_MAX, + stored_subset + ); + + break; + } catch (const Fenix::CommException& nested){ + err = &nested; + } + } + + //Ensure data is correct after execution and recovery + bool successful = data.size() == 50; + if(!successful) printf("Rank %d expected data size 50, but got %d\n", rank, data.size()); + + for(int i = 0; i < data.size() && successful; i++){ + successful &= data[i] == i+1; + if(!successful) printf("Rank %d data[%d]=%d, but should be %d!\n", rank, i, data[i], i+1); + } + + if(successful){ + printf("Rank %d successfully recovered\n", rank); + } else { + printf("FAILURE on rank %d\n", rank); + } + + MPI_Finalize(); + return !successful; //return error status +} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b1f7321..faa8411 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(03_reduce/fenix) add_subdirectory(04_Isend_Irecv/fenix) add_subdirectory(05_subset_create) add_subdirectory(06_subset_createv) +add_subdirectory(07_resizeable_member) diff --git a/include/fenix.h b/include/fenix.h index 67be4b5..8c042fc 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -61,13 +61,9 @@ #include #if defined(c_plusplus) || defined(__cplusplus) -#include "fenix.hpp" - extern "C" { #endif - -#include "fenix_data_subset.h" #include "fenix_init.h" /** @@ -116,8 +112,6 @@ extern "C" { //!@internal @brief Agreement code for data commit barrier #define FENIX_DATA_COMMIT_BARRIER_LOC 4 - - /** * @defgroup ProcessRecovery Process Recovery * @brief Functions for managing process recovery in Fenix. @@ -298,7 +292,13 @@ int Fenix_Process_detect_failures(int do_recovery); int Fenix_get_number_of_ranks_with_role(int, int *); //!@unimplemented Returns the #Fenix_Rank_role for a given rank -int Fenix_get_role(MPI_Comm comm, int rank, int *role); +int Fenix_get_rank_role(MPI_Comm comm, int rank, int *role); + +//!@brief Returns this rank's #Fenix_Rank_role +Fenix_Rank_role Fenix_get_role(); + +//!@brief Returns the error value from Fenix_Init or the latest recovery +int Fenix_get_error(); /** * @brief Get the list of ranks that failed in the most recent failure. @@ -357,9 +357,11 @@ int Fenix_Finalize(); #define FENIX_DATA_MEMBER_ATTRIBUTE_SIZE 14 #define FENIX_DATA_SNAPSHOT_LATEST -1 #define FENIX_DATA_SNAPSHOT_ALL 16 +#define FENIX_RESIZEABLE 0 #define FENIX_DATA_SUBSET_CREATED 2 #define FENIX_DATA_POLICY_IN_MEMORY_RAID 13 +#define FENIX_DATA_POLICY_IMR FENIX_DATA_POLICY_IN_MEMORY_RAID /** * @unimplemented As MPI_Request, but for Fenix asynchronous data recovery calls @@ -369,6 +371,22 @@ typedef struct { MPI_Request mpi_recv_req; } Fenix_Request; + +/** + * @brief Represents a data subset that can be stored/recovered + * + * Must be initialized (via #Fenix_Data_subset_create or + * #Fenix_Data_subset_createv) before using as an input parameter. + * + * Must be uninitialized or freed (#Fenix_Data_subset_free) before using as an + * output parameter to avoid data leaks. + */ +typedef struct { + //!@internal @brief pointer to a Fenix::DataSubset object + void* impl; +} Fenix_Data_subset; + + //!@brief A standin for checkpointing/recovering all available data in a member. extern const Fenix_Data_subset FENIX_DATA_SUBSET_FULL; @@ -428,7 +446,7 @@ int Fenix_Data_group_create(int group_id, MPI_Comm comm, int start_time_stamp, * is critical for non-survivor ranks after a failure which will have an invalid address * which was generated on the failed rank and must update. * @param count The maximum number of contiguous elements of type \c datatype of the data to be - * stored. Need not be the same in all calling ranks. + * stored. A value of FENIX_RESIZEABLE allows this member to have a varying data size. * @param datatype The MPI_Datatype of the elements in \c source_buffer * * @return FENIX_SUCCESS, or an error value. @@ -468,24 +486,25 @@ int Fenix_Data_test(Fenix_Request request, int *flag); * @param member_id All ranks must provide the same member_id * @param subset_specifier Which subset of the data to store. It is always valid for every rank to provide the same * subset_specifier; depending on the group's policy, varying combinations of specifiers may be possible. + * If this member was created with size FENIX_RESIZEABLE, FENIX_DATA_SUBSET_ALL is an invalid input. * @return FENIX_SUCCESS, or an error value. */ int Fenix_Data_member_store(int group_id, int member_id, - Fenix_Data_subset subset_specifier); + const Fenix_Data_subset subset_specifier); //!@unimplemented As [store](#Fenix_Data_member_store), but subsets may vary rank-to-rank. int Fenix_Data_member_storev(int group_id, int member_id, - Fenix_Data_subset subset_specifier); + const Fenix_Data_subset subset_specifier); //!@unimplemented As [store](#Fenix_Data_member_store), but asynchronous. int Fenix_Data_member_istore(int group_id, int member_id, - Fenix_Data_subset subset_specifier, + const Fenix_Data_subset subset_specifier, Fenix_Request *request); //!@unimplemented As [istore](#Fenix_Data_member_istore), but asynchronous. int Fenix_Data_member_istorev(int group_id, int member_id, - Fenix_Data_subset subset_specifier, + const Fenix_Data_subset subset_specifier, Fenix_Request *request); /** @@ -733,6 +752,8 @@ int Fenix_Data_member_delete(int group_id, int member_id); #if defined(c_plusplus) || defined(__cplusplus) } + +#include "fenix.hpp" #endif #endif // __FENIX__ diff --git a/include/fenix.hpp b/include/fenix.hpp index 0234fe3..af6d1b2 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -62,6 +62,7 @@ #include #include "fenix.h" #include "fenix_exception.hpp" +#include "fenix_data_subset.hpp" /** * @brief As the C-style callback, but accepts an std::function and does not use the void* pointer. @@ -72,4 +73,65 @@ */ int Fenix_Callback_register(std::function callback); +namespace Fenix { + +namespace Args { +struct FenixInitArgs { + int* role = nullptr; + MPI_Comm in_comm = MPI_COMM_WORLD; + MPI_Comm* out_comm = nullptr; + int* argc = nullptr; + char*** argv = nullptr; + int spares = 0; + int spawn = 0; + Fenix_Resume_mode resume_mode = THROW; + Fenix_Unhandled_mode unhandled_mode = ABORT; + int* err = nullptr; +}; +} + +void init(const Args::FenixInitArgs args); + +//!@brief Throw an exception for the most recent fault. Helpful for spares. +void throw_exception(); + +} // namespace Fenix + +namespace Fenix::Data { + +extern const DataSubset FENIX_SUBSET_FULL; +extern const DataSubset FENIX_SUBSET_EMPTY; + +//!@brief Overload of #Fenix_Data_member_store +int member_store(int group_id, int member_id, const DataSubset& subset); + +//!@brief Overload of #Fenix_Data_member_storev +int member_storev(int group_id, int member_id, const DataSubset& subset); + +//!@brief Overload of #Fenix_Data_member_istore +int member_istore( + int group_id, int member_id, const DataSubset& subset, + Fenix_Request *request +); + +//!@brief Overload of #Fenix_Data_member_istorev +int member_istorev( + int group_id, int member_id, const DataSubset& subset, + Fenix_Request *request +); + +//!@brief Overload of #Fenix_Data_member_restore +int member_restore( + int group_id, int member_id, void *target_buffer, int max_count, + int time_stamp, DataSubset& data_found +); + +//!@brief Overload of #Fenix_Data_member_lrestore +int member_lrestore( + int group_id, int member_id, void *target_buffer, int max_count, + int time_stamp, DataSubset& data_found +); + +} // namespace Fenix::Data + #endif diff --git a/include/fenix_data_buffer.hpp b/include/fenix_data_buffer.hpp new file mode 100644 index 0000000..c56de2f --- /dev/null +++ b/include/fenix_data_buffer.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef FENIX_DATA_BUFFER_HPP +#define FENIX_DATA_BUFFER_HPP + +#include +#include + +namespace Fenix { +namespace Detail { + +template +struct UninitializedCharAllocator : public std::allocator{ + UninitializedCharAllocator() noexcept {}; + template + UninitializedCharAllocator(const U& other) noexcept {}; + + using value_type = T; + void construct(char*){ }; + + template + struct rebind { + using other = UninitializedCharAllocator; + }; +}; + +using BufferVec = std::vector>; + +} + +class DataBuffer : public Detail::BufferVec { + public: + using BufferVec = Detail::BufferVec; + + void reset(size_t new_size = 0){ + //Clear first, to be sure any re-allocations don't actually move data + clear(); + resize(new_size); + } + + int send(int dst, int tag, MPI_Comm comm){ + return MPI_Send(data(), size(), MPI_BYTE, dst, tag, comm); + } + + //Recv n bytes + int recv( + int n, int src, int tag, MPI_Comm comm, + MPI_Status* status = MPI_STATUS_IGNORE + ) { + reset(n); + return MPI_Recv(data(), n, MPI_BYTE, src, tag, comm, status); + } + + //Recv an unknown amount of data and resize to fit + int recv_unknown( + int src, int tag, MPI_Comm comm, MPI_Status* status = MPI_STATUS_IGNORE + ) { + MPI_Status p_status; + MPI_Probe(src, tag, comm, &p_status); + + int n; + MPI_Get_count(&p_status, MPI_BYTE, &n); + return recv(n, src, tag, comm); + } + +}; + +} + +#endif //FENIX_DATA_BUFFER_HPP diff --git a/include/fenix_data_group.hpp b/include/fenix_data_group.hpp index 9add516..0c2f258 100644 --- a/include/fenix_data_group.hpp +++ b/include/fenix_data_group.hpp @@ -56,90 +56,24 @@ #ifndef __FENIX_DATA_GROUP_H__ #define __FENIX_DATA_GROUP_H__ -#include +#include #include #include "fenix.h" -#include "fenix_ext.hpp" #include "fenix_data_member.hpp" #include "fenix_data_packet.hpp" #include "fenix_util.hpp" -#include "fenix_data_subset.h" +#include "fenix_data_subset.hpp" #define __FENIX_DEFAULT_GROUP_SIZE 32 -typedef struct __fenix_group_vtbl fenix_group_vtbl_t; -typedef struct __fenix_group fenix_group_t; - - namespace Fenix::Data { using member_iterator = std::pair; -} //end namespace Fenix::Data - - -//This defines the functions which must be implemented by the group -typedef struct __fenix_group_vtbl { - int (*group_delete)(fenix_group_t* group); - - int (*member_create)(fenix_group_t* group, fenix_member_entry_t* mentry); - - int (*member_delete)(fenix_group_t* group, int member_id); - - int (*get_redundant_policy)(fenix_group_t*, int* policy_name, - void* policy_value, int* flag); - - int (*member_store)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); - - int (*member_storev)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); - - int (*member_istore)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); - - int (*member_istorev)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); - - int (*commit)(fenix_group_t* group); - - int (*snapshot_delete)(fenix_group_t* group, int time_stamp); - - int (*barrier)(fenix_group_t* group); - - int (*member_restore)(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); - - int (*member_lrestore)(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); - - int (*member_restore_from_rank)(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - int source_rank); - - int (*get_number_of_snapshots)(fenix_group_t* group, - int* number_of_snapshots); - - int (*get_snapshot_at_position)(fenix_group_t* group, int position, - int* time_stamp); - - int (*reinit)(fenix_group_t* group, int* flag); - - int (*member_get_attribute)(fenix_group_t* group, fenix_member_entry_t* mentry, - int attributename, void* attributevalue, int* flag, int sourcerank); - - int (*member_set_attribute)(fenix_group_t* group, fenix_member_entry_t* mentry, - int attributename, void* attributevalue, int* flag); - -} fenix_group_vtbl_t; - //We keep basic bookkeeping info here, policy specific //information is kept by the policy's data type. -typedef struct __fenix_group { - fenix_group_vtbl_t vtbl; +struct fenix_group_t { int groupid; MPI_Comm comm; int comm_size; @@ -148,18 +82,38 @@ typedef struct __fenix_group { int timestamp; int depth; int policy_name; - std::vector members; + std::map members; //Search for id, returning {-1, nullptr} if not found. Fenix::Data::member_iterator search_member(int id); - //As search_member, but print an error message is id not found. + //As search_member, but print an error message if id not found. Fenix::Data::member_iterator find_member(int id); -} fenix_group_t; + + virtual int group_delete() = 0; + virtual int member_create(fenix_member_entry_t* member) = 0; + virtual int member_delete(int memberid) = 0; + virtual int get_redundant_policy(int* name, void* value, int* flag) = 0; + virtual int member_store(int memberid, const DataSubset& subset) = 0; + virtual int member_storev(int memberid, const DataSubset& subset) = 0; + virtual int member_istore(int memberid, const DataSubset& subset, Fenix_Request* req) = 0; + virtual int member_istorev(int memberid, const DataSubset& subset, Fenix_Request* req) = 0; + virtual int commit() = 0; + virtual int snapshot_delete(int timestamp) = 0; + virtual int barrier() = 0; + virtual int member_restore(int member_id, void* target_bugger, int max, int timestamp, DataSubset& data_found) = 0; + virtual int member_lrestore(int member_id, void* target_bugger, int max, int timestamp, DataSubset& data_found) = 0; + virtual int member_restore_from_rank(int member_id, void* target_bugger, int max, int timestamp, int source_rank) = 0; + virtual int get_number_of_snapshots(int* num) = 0; + virtual int get_snapshot_at_position(int position, int* timestamp) = 0; + virtual int reinit(int* flag) = 0; + virtual int member_get_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag, int sourcerank) = 0; + virtual int member_set_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag) = 0; +}; typedef struct __fenix_data_recovery { size_t count; size_t total_size; - fenix_group_t **group; + Fenix::Data::fenix_group_t **group; } fenix_data_recovery_t; typedef struct __group_entry_packet { @@ -185,11 +139,10 @@ int __fenix_search_groupid( int key, fenix_data_recovery_t *dr); int __fenix_find_next_group_position( fenix_data_recovery_t *dr ); -namespace Fenix::Data { - using group_iterator = std::pair; -group_iterator find_group(int id, fenix_data_recovery_t *dr = fenix.data_recovery); +group_iterator find_group(int id); +group_iterator find_group(int id, fenix_data_recovery_t *dr); } //end namespace Fenix::Data diff --git a/include/fenix_data_member.hpp b/include/fenix_data_member.hpp index 94ef32b..3941060 100644 --- a/include/fenix_data_member.hpp +++ b/include/fenix_data_member.hpp @@ -62,22 +62,27 @@ #define __FENIX_DEFAULT_MEMBER_SIZE 512 +namespace Fenix::Data { -typedef struct __fenix_group fenix_group_t; +struct fenix_group_t; -typedef struct __fenix_member_entry { - int memberid = -1; - enum states state; - void *user_data = nullptr; +struct fenix_member_entry_packet_t { + int memberid; int datatype_size; int current_count; -} fenix_member_entry_t; +}; -typedef struct __member_entry_packet { - int memberid; +struct fenix_member_entry_t { + fenix_member_entry_t() = default; + + fenix_member_entry_packet_t to_packet(); + + int memberid = -1; + enum states state; + char *user_data = nullptr; int datatype_size; int current_count; -} fenix_member_entry_packet_t; +}; fenix_member_entry_t* __fenix_data_member_add_entry(fenix_group_t* group, int memberid, void* data, int count, int datatype_size); @@ -91,4 +96,5 @@ int __fenix_search_memberid(fenix_group_t* group, int memberid); void __fenix_data_member_reinit(fenix_group_t *group, fenix_two_container_packet_t packet, enum states mystatus); +} #endif // FENIX_DATA_MEMBER_H diff --git a/include/fenix_data_policy.hpp b/include/fenix_data_policy.hpp index 58bc1a2..18b2d23 100644 --- a/include/fenix_data_policy.hpp +++ b/include/fenix_data_policy.hpp @@ -61,7 +61,11 @@ #include "fenix.h" #include "fenix_data_group.hpp" +namespace Fenix::Data { + int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, int policy_name, void* policy_value, int* flag); +} // namespace Fenix::Data + #endif //__FENIX_DATA_POLICY_H__ diff --git a/include/fenix_data_policy_in_memory_raid.hpp b/include/fenix_data_policy_in_memory_raid.hpp index e420c79..8063cbf 100644 --- a/include/fenix_data_policy_in_memory_raid.hpp +++ b/include/fenix_data_policy_in_memory_raid.hpp @@ -58,9 +58,178 @@ #define __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ #include +#include +#include +#include +#include +#include #include "fenix_data_group.hpp" +#include "fenix_data_buffer.hpp" +#include "fenix_data_subset.hpp" -void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, +namespace Fenix::Data::IMR { + +void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, void* policy_value, int* flag); +struct Entry { + //No copying, must be moved + Entry(const Entry&) = delete; + Entry(Entry&&); + Entry& operator=(Entry&&); + + Entry(int size, int max_count); + + //Re-initializes + void reset(); + + //Get raw buffer pointer + char* data(); + //Get buffer size + int size(); + //Resize buffer + void resize(int size); + //Add subset to region and ensure buf is large enough. + void add_and_fit(const DataSubset& subset); + + DataBuffer buf; + DataSubset region; + + char* partner_data(); + int partner_size(); + void partner_resize(int size); + void partner_add_and_fit(const DataSubset& subset); + + DataBuffer partner_buf; + DataSubset partner_region; + + int timestamp = -2; + int elm_size; + int elm_max_count; +}; + +struct Group; + +struct Member { + Member(fenix_member_entry_t& mentry, Group& group); + + //Returns true if snapshot was found. + bool snapshot_delete(int timestamp); + + //Member::store(v) copies local data and region. + int store(const DataSubset& subset); + //Handles partner data and region + virtual int store_impl(const DataSubset& subset) = 0; + + //As store(_impl) + int storev(const DataSubset& subset); + virtual int storev_impl(const DataSubset& subset) = 0; + + //Restore all internal snapshot data + //Moves entries to align with the group's list of timestamps. + //Impl must actually restoring entry data + int restore(); + virtual int restore_impl() = 0; + + int lrestore(char* target, int max, int timestamp, DataSubset& subset); + + void commit(int timestamp); + + fenix_member_entry_t& mentry; + Group& group; + int id = mentry.memberid; + // entries to be initialized by inheritors + std::deque entries; + + DataBuffer& send_buf; + DataBuffer& recv_buf; +}; + +struct BuddyMember : public Member { + BuddyMember(fenix_member_entry_t& mentry, Group& group); + int restore_impl() override; + int store_impl(const DataSubset& subset) override; + int storev_impl(const DataSubset& subset) override; + int exch(const DataSubset& subset, const DataSubset& partner_subset); +}; + +struct ParityMember : public Member { + ParityMember(fenix_member_entry_t& mentry, Group& group); + int restore_impl() override; + int store_impl(const DataSubset& subset) override; + + int storev_impl(const DataSubset& subset) override { + fatal_print("IMR mode 5 cannot storev"); + return 0; + }; +}; + +struct Group : public fenix_group_t { + Group(MPI_Comm comm, int timestart, int depth, int* policy, int* flag); + + int mode; + int rank_separation; + std::vector partners; + + MPI_Comm set_comm = MPI_COMM_NULL; + int set_size, set_rank; + + std::map> member_data; + std::deque timestamps; + + DataBuffer send_buf, recv_buf; + + void sync_timestamps(); + void build_set_comm(); + + //nullptr if member not found + Member* find_member(int member_id); + + std::string str(); + + int group_delete() override; + int member_create(fenix_member_entry_t* mentry) override; + int member_delete(int member_id) override; + int get_redundant_policy(int* name, void* value, int* flag) override; + + int member_store(int member_id, const DataSubset& subset) override; + int member_storev(int member_id, const DataSubset& subset) override; + int member_istore( + int member_id, const DataSubset& subset, Fenix_Request *request + ) override; + int member_istorev( + int member_id, const DataSubset& subset, Fenix_Request *request + ) override; + + int commit() override; + + int snapshot_delete(int timestamp) override; + int barrier() override; + + int member_restore( + int member_id, void* buffer, int max, int timestamp, DataSubset& data_found + ) override; + int member_lrestore( + int member_id, void* buffer, int max, int timestamp, DataSubset& data_found + ) override; + int member_restore_from_rank( + int member_id, void* buffer, int max, int timestamp, int source_rank + ) override; + + int member_get_attribute( + fenix_member_entry_t* member, int name, void* value, int* flag, + int sourcerank + ) override; + int member_set_attribute( + fenix_member_entry_t* member, int name, void* value, int* flag + ) override; + + int get_number_of_snapshots(int* number_of_snapshots) override; + int get_snapshot_at_position(int position, int* timestamp) override; + + int reinit(int* flag) override; +}; + +} + #endif //__FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ diff --git a/include/fenix_data_recovery.hpp b/include/fenix_data_recovery.hpp index 717bb13..8610aad 100644 --- a/include/fenix_data_recovery.hpp +++ b/include/fenix_data_recovery.hpp @@ -74,11 +74,6 @@ #define __NUM_MEMBER_ATTR_SIZE 3 #define __GRP_MEMBER_LENTRY_ATTR_SIZE 11 - - - - - #define STORE_RANK_TAG 2000 #define STORE_COUNT_TAG 2001 #define STORE_SIZE_TAG 2002 @@ -95,10 +90,7 @@ #define RECOVER_DATA_TAG 1907 - - - - +namespace Fenix::Data { typedef struct __data_entry_packet { int count; @@ -111,15 +103,15 @@ int __fenix_group_get_redundancy_policy(int, int*, int*, int*); int __fenix_member_create(int, int, void *, int, int); int __fenix_data_wait(Fenix_Request); int __fenix_data_test(Fenix_Request, int *); -int __fenix_member_store(int, int, Fenix_Data_subset); -int __fenix_member_storev(int, int, Fenix_Data_subset); -int __fenix_member_istore(int, int, Fenix_Data_subset, Fenix_Request *); -int __fenix_member_istorev(int, int, Fenix_Data_subset, Fenix_Request *); +int __fenix_member_store(int, int, const DataSubset&); +int __fenix_member_storev(int, int, const DataSubset&); +int __fenix_member_istore(int, int, const DataSubset&, Fenix_Request *); +int __fenix_member_istorev(int, int, const DataSubset&, Fenix_Request *); int __fenix_data_commit(int, int *); int __fenix_data_commit_barrier(int, int *); int __fenix_data_barrier(int); -int __fenix_member_restore(int, int, void *, int, int, Fenix_Data_subset*); -int __fenix_member_lrestore(int, int, void *, int, int, Fenix_Data_subset*); +int __fenix_member_restore(int, int, void *, int, int, DataSubset&); +int __fenix_member_lrestore(int, int, void *, int, int, DataSubset&); int __fenix_member_restore_from_rank(int, int, void *, int, int, int); int __fenix_get_number_of_members(int, int *); int __fenix_get_member_at_position(int, int *, int); @@ -135,11 +127,6 @@ int __fenix_member_delete(int, int); void __fenix_init_data_recovery(); void __fenix_init_partner_copy_recovery(); - -void __fenix_dr_print_store(); -void __fenix_dr_print_restore(); -void __fenix_dr_print_datastructure(); -void __fenix_store_single(); -void __fenix_store_all(); +} #endif diff --git a/include/fenix_data_subset.h b/include/fenix_data_subset.h index abb2587..0f08602 100644 --- a/include/fenix_data_subset.h +++ b/include/fenix_data_subset.h @@ -57,45 +57,34 @@ #define __FENIX_DATA_SUBSET_H__ #include -#define __FENIX_SUBSET_EMPTY 1 -#define __FENIX_SUBSET_FULL 2 -#define __FENIX_SUBSET_CREATE 3 -#define __FENIX_SUBSET_CREATEV 4 -#define __FENIX_SUBSET_UNDEFINED -1 - - -//Specifier speeds up the process by letting us know if this is a simple -//subset in which each region is repeated w/ same stride, or if each -//region is never repeated (EG create vs createv). Also has specifiers for -//FULL/EMPTY. -typedef struct { - int num_blocks; - int* start_offsets; - int* end_offsets; - int* num_repeats; - int stride; - int specifier; -} Fenix_Data_subset; +#include "fenix.h" int __fenix_data_subset_init(int num_blocks, Fenix_Data_subset* subset); +int __fenix_data_subset_init_empty(Fenix_Data_subset* subset); int __fenix_data_subset_create(int, int, int, int, Fenix_Data_subset *); int __fenix_data_subset_createv(int, int *, int *, Fenix_Data_subset *); -void __fenix_data_subset_deep_copy(Fenix_Data_subset* from, Fenix_Data_subset* to); -void __fenix_data_subset_merge(Fenix_Data_subset* first_subset, - Fenix_Data_subset* second_subset, Fenix_Data_subset* output); +void __fenix_data_subset_deep_copy(const Fenix_Data_subset* from, Fenix_Data_subset* to); +void __fenix_data_subset_merge(const Fenix_Data_subset* first_subset, + const Fenix_Data_subset* second_subset, Fenix_Data_subset* output); void __fenix_data_subset_merge_inplace(Fenix_Data_subset* first_subset, - Fenix_Data_subset* second_subset); -void __fenix_data_subset_copy_data(Fenix_Data_subset* ss, void* dest, + const Fenix_Data_subset* second_subset); +void __fenix_data_subset_copy_data(const Fenix_Data_subset* ss, void* dest, void* src, size_t data_type_size, size_t max_size); -int __fenix_data_subset_data_size(Fenix_Data_subset* ss, size_t max_size); -void* __fenix_data_subset_serialize(Fenix_Data_subset* ss, void* src, - size_t type_size, size_t max_size, size_t* output_size); -void __fenix_data_subset_deserialize(Fenix_Data_subset* ss, void* src, +int __fenix_data_subset_storage_size(const Fenix_Data_subset* ss, size_t max_size); +void __fenix_data_subset_serialize(const Fenix_Data_subset* ss, void* src, + void* dest, size_t type_size, size_t max_size, size_t output_size); +void __fenix_data_subset_deserialize(const Fenix_Data_subset* ss, void* src, void* dest, size_t max_size, size_t type_size); -void __fenix_data_subset_send(Fenix_Data_subset* ss, int dest, int tag, MPI_Comm comm); +void __fenix_data_subset_send(const Fenix_Data_subset* ss, int dest, int tag, MPI_Comm comm); void __fenix_data_subset_recv(Fenix_Data_subset* ss, int src, int tag, MPI_Comm comm); -int __fenix_data_subset_is_full(Fenix_Data_subset* ss, size_t data_length); +int __fenix_data_subset_is_full(const Fenix_Data_subset* ss, size_t data_length); int __fenix_data_subset_free(Fenix_Data_subset *); int __fenix_data_subset_delete(Fenix_Data_subset *); +size_t __fenix_data_subset_count(const Fenix_Data_subset* ss, size_t max_idx); +inline size_t __fenix_data_subset_data_size( + const Fenix_Data_subset* ss, size_t max_size +){ + return __fenix_data_subset_count(ss, max_size-1); +} #endif // FENIX_DATA_SUBSET_H diff --git a/include/fenix_data_subset.hpp b/include/fenix_data_subset.hpp new file mode 100644 index 0000000..fba9f68 --- /dev/null +++ b/include/fenix_data_subset.hpp @@ -0,0 +1,227 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef __FENIX_DATA_SUBSET_HPP__ +#define __FENIX_DATA_SUBSET_HPP__ + +#include "fenix.h" +#include "fenix_opt.hpp" +#include "fenix_data_buffer.hpp" + +#include +#include +#include +#include +#include + +namespace Fenix { + +namespace Detail { + +struct DataRegionIterator; + +struct DataRegion { + static constexpr size_t MAX = std::numeric_limits::max(); + + DataRegion(std::pair b) + : DataRegion(b, 0, MAX) { }; + DataRegion(std::pair b, size_t m_reps, size_t m_stride) + : start(b.first), + end((b.second!=MAX && b.second+1==b.first+m_stride) ? + b.second+m_reps*m_stride : b.second), + reps((b.second==MAX || b.second+1==b.first+m_stride) ? 0 : m_reps), + stride(reps == 0 ? MAX : m_stride) + { + fenix_assert(start <= end); + fenix_assert(stride != MAX || reps == 0); + fenix_assert(reps == 0 || start+stride > end); + }; + + //Overall range of this region. + std::pair range() const; + + //Count of elements contained in this region + size_t count() const; + + bool operator==(const DataRegion& other) const; + bool operator!=(const DataRegion& other) const; + + //Order based on start + bool operator<(const DataRegion& other) const; + + //Return true if these regions intersect + bool operator&&(const DataRegion& other) const; + + //Returns intersection of two regions. Not defined when both regions are strided. + std::set operator&(const DataRegion& other) const; + + //This region w/o the overlap with other + std::set operator-(const DataRegion& other) const; + + //Get a region that is a single repetition of this one's, with bounds check + DataRegion get_rep(size_t n) const; + //As above, but multiple repetitions + DataRegion get_reps(size_t first, size_t last) const; + + //For a strided region, returns region between repetitions + //Undefined for an unstrided region. + DataRegion inverted() const; + + std::optional try_merge(const DataRegion& other) const; + + std::string str() const; + + //Inclusive region bounds. + size_t start, end; + + //Number of times to repeat this after the first + size_t reps; + + //Distance between starts of each repetition + size_t stride; +}; +} // namespace Detail + +struct DataSubset { + static constexpr size_t MAX = Detail::DataRegion::MAX; + + //DataSubset(const DataSubset&) = default; + //DataSubset(DataSubset&&) = default; + //Empty + DataSubset() = default; + //[0, end] + explicit DataSubset(size_t end); + //[bounds.first, bounds.second] + DataSubset(std::pair bounds); + //[b.first, b.second], ..., [b.first+stride*(n-1), b.second+stride*(n-1)] + DataSubset(std::pair b, size_t n, size_t stride); + //[bounds[0].first, bounds[0].second], ... + DataSubset(std::vector> bounds); + //Merge two subsets + DataSubset(const DataSubset& a, const DataSubset& b); + //Create from serialized subset object + DataSubset(const DataBuffer& buf); + + DataSubset operator+(const DataSubset& other) const; + DataSubset& operator+=(const DataSubset& other); + + DataSubset operator+(const Fenix_Data_subset& other) const; + DataSubset& operator+=(const Fenix_Data_subset& other); + + DataSubset operator-(const DataSubset& other) const; + bool operator==(const DataSubset& other) const; + bool operator!=(const DataSubset& other) const; + + bool empty() const; + + //Overall range of this subset + std::pair range() const; + //Equivalent to range().first and range().second, possibly more performant + size_t start() const; + size_t end() const; + + //Count of elements in this subset from [0, max_index] + //Returns 0 if max_index==end()==MAX + size_t count(size_t max_index) const; + + //Count of elements in this subset if it were full [0, end()] + //Returns 0 if end()==MAX + size_t max_count() const; + + //Serialize this subset object into buf + //Will resize buf to fit exactly. + void serialize(DataBuffer& buf) const; + + //Will reset dst to fit + void serialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst + ) const; + //If dst.size()==0, will resize dst to fit + void deserialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst + ) const; + + //If src_len == 0, will assume src is a large as needed + //Will resize dst if too small + void copy_data( + const size_t elm_size, const size_t src_len, const char* src, DataBuffer& dst + ) const; + //If dst_len == 0, will assume dst is as large as needed + void copy_data( + const size_t elm_size, const DataBuffer& src, const size_t dst_len, + char* dst + ) const; + + //Whether this subset includes the element at index idx + bool includes(size_t idx) const; + //Whether this subset includes the entire range [0, end] without gaps + bool includes_all(size_t end) const; + + //Return equivalent of regions & [0, max_index] + std::set bounded_regions(size_t max_index) const; + //As above, but regions & [start, end] + std::set bounded_regions(size_t start, size_t end) const; + + std::string str() const; + + //Individual data regions in this subset + std::set regions; + + private: + //merge immediately adjacent regions to simplify + void merge_regions(); +}; +} // namespace Fenix +#endif // __FENIX_DATA_SUBSET_HPP_ diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index f140d90..0fce316 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -62,8 +62,7 @@ #include "fenix.h" #include "fenix_opt.hpp" #include "fenix_process_recovery.hpp" - -typedef struct __fenix_data_recovery fenix_data_recovery_t; +#include "fenix_data_group.hpp" typedef struct { int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init @@ -105,9 +104,8 @@ typedef struct { MPI_Op agree_op; // Global agreement call for Fenix data recovery API MPI_Errhandler mpi_errhandler; // Our custom error handler - fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure + Fenix::Data::fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure } fenix_t; extern fenix_t fenix; #endif // __FENIX_EXT_H__ - diff --git a/include/fenix_opt.hpp b/include/fenix_opt.hpp index 2fb34d4..be73adb 100644 --- a/include/fenix_opt.hpp +++ b/include/fenix_opt.hpp @@ -70,18 +70,67 @@ #include #include -#define debug_print(fmt, ...) \ - do { fprintf(stderr, "%s: %d: %s(): " fmt, __FILE__, \ - __LINE__, __func__, __VA_ARGS__); } while (0) +// FENIX_ABORT kills whole MPI job if MPI visible in current file, else just +// aborts this process +// Prefer fenix_assert or fatal_print instead of using this directly +#ifdef MPI_VERSION + #define FENIX_ABORT() \ + do { \ + int mpi_is_init; \ + MPI_Initialized(&mpi_is_init); \ + if(mpi_is_init) MPI_Abort(MPI_COMM_WORLD, 1); \ + abort(); \ + } while(0) +#else + #define FENIX_ABORT() abort() +#endif + +// Helpers needing to support printing w/o any user-supplied format args +// Supports up to 10 args +// Functions should be named base_name_s for 1 args or base_name_a otherwise +#define FN_SUFF_I(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, NAME, ...) NAME +#define FN_SUFF(...) FN_SUFF_I(__VA_ARGS__,_a,_a,_a,_a,_a,_a,_a,_a,_a,_s) +#define FN_SUFF_MERGE_IMPL(fn, suff) fn ## suff +#define FN_SUFF_MERGE(fn, suff) FN_SUFF_MERGE_IMPL(fn, suff) +#define FN_NAME(base_name, ...) FN_SUFF_MERGE(base_name, FN_SUFF(__VA_ARGS__)) + +#define TRACE_PRINT_FMT "%s:%d %s(): " +#define TRACE_PRINT_ARG __FILE__, __LINE__, __func__ + +#define traced_print_s(file, fmt) \ + fprintf(file, TRACE_PRINT_FMT fmt "\n", TRACE_PRINT_ARG) +#define traced_print_a(file, fmt, ...) \ + fprintf(file, TRACE_PRINT_FMT fmt "\n", TRACE_PRINT_ARG, __VA_ARGS__) +#define traced_print(file, ...) FN_NAME(traced_print, __VA_ARGS__)(file, __VA_ARGS__) + +#define debug_print(...) traced_print(stderr, __VA_ARGS__) +#define verbose_print(...) traced_print(stdout, __VA_ARGS__) -#define verbose_print(fmt, ...) \ - do { printf("%s(): " fmt, __func__, __VA_ARGS__); } while (0) +//Multi-line macro functions wrapped in do-while to maintain correct behavior +//regardless of what surrouding code is +#define fatal_print(...) \ + do { \ + traced_print(stderr, __VA_ARGS__); \ + traced_print(stderr, "Fenix aborting due to fatal error!"); \ + FENIX_ABORT(); \ + } while(0) + +#define fenix_assert_a(predicate, ...) \ + do{if( !(predicate) ){ fatal_print(__VA_ARGS__); }} while(0) +#define fenix_assert_s(predicate) \ + fenix_assert_a(predicate, "internal error, failed assertion (" #predicate ")" ); + +#ifdef NDEBUG + //Disable assertions when NDEBUG + #define fenix_assert(...) do { } while(0) +#else + #define fenix_assert(...) FN_NAME(fenix_assert, __VA_ARGS__)(__VA_ARGS__) +#endif typedef struct __fenix_debug_opt_t { int verbose = -1; } fenix_debug_opt_t; - void __fenix_init_opt(int argc, char **argv); #endif diff --git a/include/fenix_process_recovery.hpp b/include/fenix_process_recovery.hpp index 760617f..bfcced3 100644 --- a/include/fenix_process_recovery.hpp +++ b/include/fenix_process_recovery.hpp @@ -57,6 +57,7 @@ #ifndef __FENIX_PROCESS_RECOVERY__ #define __FENIX_PROCESS_RECOVERY__ +#include #include #include #include @@ -83,9 +84,11 @@ typedef struct { fenix_comm_list_elm_t *tail; } fenix_comm_list_t; -void __fenix_set_resume_mode(const std::string_view& name); +Fenix_Resume_mode get_resume_mode(const std::string_view& name); -void __fenix_set_unhandled_mode(const std::string_view& name); +Fenix_Unhandled_mode get_unhandled_mode(const std::string_view& name); + +int fenix_preinit(const Fenix::Args::FenixInitArgs& args, jmp_buf* jump_env = nullptr); int __fenix_create_new_world(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0256344..2f67a07 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,32 +15,32 @@ configure_file (${CMAKE_SOURCE_DIR}/include/fenix-config.h.in FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h*) set (Fenix_SOURCES -fenix.cpp -fenix_opt.cpp -fenix_process_recovery.cpp -fenix_util.cpp -fenix_data_recovery.cpp -fenix_data_group.cpp -fenix_data_policy.cpp -fenix_data_policy_in_memory_raid.cpp -fenix_data_member.cpp -fenix_data_subset.cpp -fenix_callbacks.cpp -globals.cpp + fenix.cpp + fenix_opt.cpp + fenix_process_recovery.cpp + fenix_util.cpp + fenix_data_recovery.cpp + fenix_data_group.cpp + fenix_data_policy.cpp + fenix_data_policy_in_memory_raid.cpp + fenix_data_member.cpp + fenix_data_subset.cpp + fenix_callbacks.cpp + globals.cpp ) -add_library( fenix STATIC ${Fenix_SOURCES}) +add_library(fenix STATIC ${Fenix_SOURCES}) target_compile_features(fenix PRIVATE cxx_std_17) target_link_libraries(fenix PUBLIC MPI::MPI_CXX) -target_include_directories(fenix - PUBLIC +target_include_directories(fenix + PUBLIC $ $ $ - PRIVATE "${CMAKE_SOURCE_DIR}/src") + PRIVATE "${CMAKE_SOURCE_DIR}/src") install(TARGETS fenix EXPORT fenix diff --git a/src/fenix.cpp b/src/fenix.cpp index b4faa96..d363b7e 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -59,9 +59,18 @@ #include "fenix_util.hpp" #include "fenix_ext.hpp" #include "fenix.hpp" +#include "fenix_data_subset.hpp" -const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_FULL}; -const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_EMPTY}; +using namespace Fenix; +using namespace Fenix::Data; + +namespace Fenix::Data { +const DataSubset FENIX_SUBSET_FULL = {{0, Fenix::DataSubset::MAX}}; +const DataSubset FENIX_SUBSET_EMPTY = {}; +} + +const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = { new DataSubset(DataSubset::MAX) }; +const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY = { new DataSubset() }; int Fenix_Callback_register(std::function callback){ return __fenix_callback_register(callback); @@ -108,20 +117,20 @@ int Fenix_Data_test(Fenix_Request request, int *flag) { return __fenix_data_test(request, flag); } -int Fenix_Data_member_store(int group_id, int member_id, Fenix_Data_subset subset_specifier) { - return __fenix_member_store(group_id, member_id, subset_specifier); +int Fenix_Data_member_store(int group_id, int member_id, const Fenix_Data_subset subset) { + return member_store(group_id, member_id, *(DataSubset*)subset.impl); } -int Fenix_Data_member_storev(int group_id, int member_id, Fenix_Data_subset subset_specifier) { - return 0; +int Fenix_Data_member_storev(int group_id, int member_id, const Fenix_Data_subset subset) { + return member_storev(group_id, member_id, *(DataSubset*)subset.impl); } -int Fenix_Data_member_istore(int group_id, int member_id, Fenix_Data_subset subset_specifier, Fenix_Request *request) { - return 0; +int Fenix_Data_member_istore(int group_id, int member_id, const Fenix_Data_subset subset, Fenix_Request *request) { + return member_istore(group_id, member_id, *(DataSubset*)subset.impl, request); } -int Fenix_Data_member_istorev(int group_id, int member_id, Fenix_Data_subset subset_specifier, Fenix_Request *request) { - return 0; +int Fenix_Data_member_istorev(int group_id, int member_id, const Fenix_Data_subset subset, Fenix_Request *request) { + return member_istorev(group_id, member_id, *(DataSubset*)subset.impl, request); } int Fenix_Data_commit(int group_id, int *time_stamp) { @@ -137,11 +146,29 @@ int Fenix_Data_barrier(int group_id) { } int Fenix_Data_member_restore(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found) { - return __fenix_member_restore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); + DataSubset* s = new DataSubset(); + int ret = member_restore( + group_id, member_id, target_buffer, max_count, time_stamp, *s + ); + if(data_found == nullptr){ + delete s; + } else { + data_found->impl = s; + } + return ret; } int Fenix_Data_member_lrestore(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found) { - return __fenix_member_lrestore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); + DataSubset* s = new DataSubset(); + int ret = member_lrestore( + group_id, member_id, target_buffer, max_count, time_stamp, *s + ); + if(data_found == nullptr){ + delete s; + } else { + data_found->impl = s; + } + return ret; } int Fenix_Data_member_restore_from_rank(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found, int source_rank) { @@ -219,3 +246,71 @@ int Fenix_check_cancelled(MPI_Request *request, MPI_Status *status){ int Fenix_Process_detect_failures(int do_recovery){ return __fenix_detect_failures(do_recovery); } + +Fenix_Rank_role Fenix_get_role(){ + return (Fenix_Rank_role) fenix.role; +} + +int Fenix_get_error(){ + return fenix.repair_result; +} + +namespace Fenix { + +void init(const Args::FenixInitArgs args){ + fenix_assert(args.resume_mode != JUMP, "Must use Fenix_Init to use the JUMP resume mode"); + + fenix_preinit(args); + __fenix_postinit(); +} + +void throw_exception(){ + throw CommException(*fenix.user_world, *fenix.ret_error); +} + +} // namespace Fenix + +namespace Fenix::Data { + +int member_store(int group_id, int member_id, const DataSubset& subset){ + return __fenix_member_store(group_id, member_id, subset); +} + +int member_storev(int group_id, int member_id, const DataSubset& subset){ + return __fenix_member_storev(group_id, member_id, subset); +} + +int member_istore( + int group_id, int member_id, const DataSubset& subset, + Fenix_Request *request +){ + fatal_print("unimplemented"); + return 0; +} + +int member_istorev( + int group_id, int member_id, const DataSubset& subset, + Fenix_Request *request +){ + fatal_print("unimplemented"); + return 0; +} + +int member_restore( + int group_id, int member_id, void *target_buffer, int max_count, + int time_stamp, DataSubset& data_found +) { + data_found = {}; + return __fenix_member_restore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); +} + +int member_lrestore( + int group_id, int member_id, void *target_buffer, int max_count, + int time_stamp, DataSubset& data_found +) { + data_found = {}; + return __fenix_member_lrestore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); +} + +} // namespace Fenix::Data + diff --git a/src/fenix_data_group.cpp b/src/fenix_data_group.cpp index 0b41648..197915f 100644 --- a/src/fenix_data_group.cpp +++ b/src/fenix_data_group.cpp @@ -54,6 +54,8 @@ //@HEADER */ +#include + #include "mpi.h" #include "fenix-config.h" #include "fenix_ext.hpp" @@ -63,6 +65,10 @@ namespace Fenix::Data { +group_iterator find_group(int id){ + return find_group(id, fenix.data_recovery); +} + group_iterator find_group(int id, fenix_data_recovery_t* dr){ int index = __fenix_search_groupid(id, dr); if(index == -1){ @@ -72,17 +78,12 @@ group_iterator find_group(int id, fenix_data_recovery_t* dr){ return {index, dr->group[index]}; } -} //end namespace Fenix::Data - -using namespace Fenix::Data; member_iterator fenix_group_t::search_member(int id){ - for(int i = 0; i < members.size(); i++){ - if(members[i].memberid == id){ - return {i, &(members[i])}; - } - } - return {-1, nullptr}; + auto iter = members.find(id); + if(iter == members.end()) return {-1, nullptr}; + assert(iter->first == iter->second.memberid); + return {std::distance(members.begin(), iter), &(iter->second)}; } member_iterator fenix_group_t::find_member(int id){ @@ -91,9 +92,6 @@ member_iterator fenix_group_t::find_member(int id){ return it; } -/** - * @brief - */ fenix_data_recovery_t * __fenix_data_recovery_init() { fenix_data_recovery_t *data_recovery = (fenix_data_recovery_t *) s_calloc(1, sizeof(fenix_data_recovery_t)); @@ -125,10 +123,10 @@ int __fenix_member_delete(int groupid, int memberid) { member_index); } - int retval = group->vtbl.member_delete(group, memberid); + int retval = group->member_delete(memberid); if(retval == FENIX_SUCCESS){ - group->members.erase(group->members.begin()+member_index); + group->members.erase(memberid); } if (fenix.options.verbose == 38) { @@ -154,7 +152,7 @@ int __fenix_group_delete_direct(fenix_group_t* group){ //knowing how many members there are during its own deletion //process. - return group->vtbl.group_delete(group); + return group->group_delete(); } int __fenix_data_recovery_remove_group(int group_index){ @@ -273,3 +271,5 @@ int __fenix_find_next_group_position( fenix_data_recovery_t *data_recovery ) { __fenix_ensure_data_recovery_capacity(data_recovery); return data_recovery->count; } + +} //end namespace Fenix::Data diff --git a/src/fenix_data_member.cpp b/src/fenix_data_member.cpp index b5a34da..b17e7a0 100644 --- a/src/fenix_data_member.cpp +++ b/src/fenix_data_member.cpp @@ -62,7 +62,16 @@ #include "fenix_data_packet.hpp" -using namespace Fenix::Data; +namespace Fenix::Data { + +fenix_member_entry_packet_t +fenix_member_entry_t::to_packet(){ + fenix_member_entry_packet_t to_ret; + to_ret.memberid = memberid; + to_ret.datatype_size = datatype_size; + to_ret.current_count = current_count; + return to_ret; +} /** * @brief @@ -79,12 +88,12 @@ fenix_member_entry_t* __fenix_data_member_add_entry(fenix_group_t* group, fenix_member_entry_t mentry; mentry.memberid = memberid; mentry.state = OCCUPIED; - mentry.user_data = data; + mentry.user_data = (char*)data; mentry.current_count = count; mentry.datatype_size = datatype_size; - group->members.push_back(mentry); + group->members[memberid] = mentry; - return &group->members.back(); + return &group->members[memberid]; } int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank){ @@ -124,5 +133,7 @@ int __fenix_data_member_recv_metadata(int groupid, int src_rank, */ void __fenix_data_member_reinit(fenix_group_t *group, fenix_two_container_packet_t packet, enum states mystatus) { - group->members.clear(); + group->members.clear(); } + +} //namespace Fenix::Data diff --git a/src/fenix_data_policy.cpp b/src/fenix_data_policy.cpp index 903bf54..f0de9c4 100644 --- a/src/fenix_data_policy.cpp +++ b/src/fenix_data_policy.cpp @@ -61,9 +61,8 @@ #include "fenix_opt.hpp" #include "fenix.h" -/** - *@brief - **/ +namespace Fenix::Data { + int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, int policy_name, void* policy_value, int* flag){ @@ -71,7 +70,7 @@ int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, switch (policy_name){ case FENIX_DATA_POLICY_IN_MEMORY_RAID: - __fenix_policy_in_memory_raid_get_group(group, comm, timestart, + IMR::__fenix_policy_in_memory_raid_get_group(group, comm, timestart, depth, policy_value, flag); retval = FENIX_SUCCESS; break; @@ -83,3 +82,5 @@ int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, return retval; } + +} diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index 940c2c6..c813881 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -54,6 +54,15 @@ //@HEADER */ +#include +#include +#include +#include +#include +#include +#include +#include + #include #include "fenix.h" #include "fenix_ext.hpp" @@ -63,6 +72,7 @@ #include "fenix_data_policy.hpp" #include "fenix_data_group.hpp" #include "fenix_data_member.hpp" +#include "fenix_data_policy_in_memory_raid.hpp" #define __FENIX_IMR_DEFAULT_MENTRY_NUM 10 #define __FENIX_IMR_NO_MEMBERS 16000 @@ -70,1184 +80,858 @@ #define STORE_PAYLOAD_TAG 2004 -int __imr_group_delete(fenix_group_t* group); -int __imr_member_create(fenix_group_t* group, fenix_member_entry_t* mentry); -int __imr_member_delete(fenix_group_t* group, int member_id); -int __imr_get_redundant_policy(fenix_group_t*, int* policy_name, - void* policy_value, int* flag); -int __imr_member_store(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); -int __imr_member_storev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); -int __imr_member_istore(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); -int __imr_member_istorev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); -int __imr_commit(fenix_group_t* group); -int __imr_snapshot_delete(fenix_group_t* group, int time_stamp); -int __imr_barrier(fenix_group_t* group); -int __imr_member_restore(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); -int __imr_member_lrestore(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); -int __imr_member_restore_from_rank(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - int source_rank); -int __imr_member_get_attribute(fenix_group_t* group, fenix_member_entry_t* member, - int attributename, void* attributevalue, int* flag, int sourcerank); -int __imr_member_set_attribute(fenix_group_t* group, fenix_member_entry_t* member, - int attributename, void* attributevalue, int* flag); -int __imr_get_number_of_snapshots(fenix_group_t* group, - int* number_of_snapshots); -int __imr_get_snapshot_at_position(fenix_group_t* group, int position, - int* time_stamp); -int __imr_reinit(fenix_group_t* group, int* flag); - -typedef struct __fenix_imr_mentry{ - void** data; - Fenix_Data_subset* data_regions; - int* timestamp; - int current_head; - int memberid; -} fenix_imr_mentry_t; - -typedef struct __fenix_imr_group{ - fenix_group_t base; - int raid_mode; - int rank_separation; - int* partners; - int set_size; - MPI_Comm set_comm; - int entries_size; - int entries_count; - fenix_imr_mentry_t* entries; - int num_snapshots; - int* timestamps; -} fenix_imr_group_t; - -typedef struct __fenix_imr_undo_log{ - int groupid, memberid; -} fenix_imr_undo_log_t; - -void __imr_sync_timestamps(fenix_imr_group_t* group); - -void __imr_undo_restore(MPI_Comm comm, int err, void* data){ - fenix_imr_undo_log_t* undo_log = (fenix_imr_undo_log_t*)data; - - Fenix_Data_member_delete(undo_log->groupid, undo_log->memberid); - - free(data); - Fenix_Callback_pop(); //Should be this callback itself. -} - +namespace Fenix::Data::IMR { -void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, +void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, void* policy_value, int* flag){ - *group = (fenix_group_t *) new fenix_imr_group_t; - fenix_imr_group_t *new_group = (fenix_imr_group_t *)(*group); - new_group->base.vtbl.group_delete = *__imr_group_delete; - new_group->base.vtbl.member_create = *__imr_member_create; - new_group->base.vtbl.member_delete = *__imr_member_delete; - new_group->base.vtbl.get_redundant_policy = *__imr_get_redundant_policy; - new_group->base.vtbl.member_store = *__imr_member_store; - new_group->base.vtbl.member_storev = *__imr_member_storev; - new_group->base.vtbl.member_istore = *__imr_member_istore; - new_group->base.vtbl.member_istorev = *__imr_member_istorev; - new_group->base.vtbl.commit = *__imr_commit; - new_group->base.vtbl.snapshot_delete = *__imr_snapshot_delete; - new_group->base.vtbl.barrier = *__imr_barrier; - new_group->base.vtbl.member_restore = *__imr_member_restore; - new_group->base.vtbl.member_lrestore = *__imr_member_lrestore; - new_group->base.vtbl.member_restore_from_rank = *__imr_member_restore_from_rank; - new_group->base.vtbl.member_get_attribute = *__imr_member_get_attribute; - new_group->base.vtbl.member_set_attribute = *__imr_member_set_attribute; - new_group->base.vtbl.get_number_of_snapshots = *__imr_get_number_of_snapshots; - new_group->base.vtbl.get_snapshot_at_position = *__imr_get_snapshot_at_position; - new_group->base.vtbl.reinit = *__imr_reinit; - - int* policy_vals = (int*)policy_value; - new_group->raid_mode = policy_vals[0]; - new_group->rank_separation = policy_vals[1]; + *group = new Group(comm, timestart, depth, (int*)policy_value, flag); +}; - int my_rank, comm_size; - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &my_rank); - - if(new_group->raid_mode == 1){ - //Set up the person who's data I am storing as partner 0 - //Set up the person who is storing my data as partner 1 - new_group->partners = (int*) malloc(sizeof(int) * 2); - - //odd-sized groups take some extra handling. - bool isOdd = ((comm_size%2) != 0); - - int remaining_size = comm_size; - if(isOdd) remaining_size -= 3; - - //We want to form groups of rank_separation*2 to pair within - int n_full_groups = remaining_size / (new_group->rank_separation*2); - - //We don't always get what we want though, one group may need to be smaller. - int mini_group_size = (remaining_size - n_full_groups*new_group->rank_separation*2)/2; - +Entry::Entry(int size, int max_count) + : elm_size(size), elm_max_count(max_count) { + buf.reserve(size * max_count); +} - int start_rank = mini_group_size + (isOdd?1:0); - int mid_rank = comm_size/2; //Only used when isOdd - - int end_mini_group_start = comm_size-mini_group_size-(isOdd?1:0); - int start_mini_group_start = (isOdd?1:0); - bool in_start_mini=false, in_end_mini=false; +Entry::Entry(Entry&& other){ + *this = std::move(other); +} - if(my_rank >= start_mini_group_start && my_rank < start_mini_group_start+mini_group_size){ - in_start_mini = true; - } else if(my_rank >= end_mini_group_start && my_rank < comm_size-(isOdd?1:0)){ - in_end_mini = true; - } +Entry& Entry::operator=(Entry&& other){ + timestamp = std::exchange(other.timestamp, -2); - //Allocate the "normal" ranks - if(my_rank >= start_rank && my_rank < end_mini_group_start && (!isOdd || my_rank != mid_rank)){ - //"effective" rank for determining which group I'm in and if I look forward or backward for a partner. - int e_rank = my_rank - start_rank; - if(isOdd && my_rank > mid_rank) --e_rank; //We skip the middle rank when isOdd + region = std::move(other.region); + partner_region = std::move(partner_region); + buf = std::move(other.buf); + partner_buf = std::move(other.partner_buf); + elm_size = other.elm_size; + elm_max_count = other.elm_max_count; + return *this; +} - int my_partner; - if(((e_rank/new_group->rank_separation)%2) == 0){ - //Look forward for partner. - my_partner = my_rank + new_group->rank_separation; - if(isOdd && my_rank < mid_rank && my_partner >= mid_rank) ++my_partner; - } else { - my_partner = my_rank - new_group->rank_separation; - if(isOdd && my_rank > mid_rank && my_partner <= mid_rank) --my_partner; - } +char* Entry::data(){ return buf.data(); } +void Entry::resize(int size){ buf.resize(size); } +int Entry::size(){ return buf.size(); } - new_group->partners[0] = my_partner; - new_group->partners[1] = my_partner; - } else if(in_start_mini) { - int e_rank = my_rank - start_mini_group_start; - int partner = end_mini_group_start + e_rank; - new_group->partners[0] = partner; - new_group->partners[1] = partner; - } else if(in_end_mini) { - int e_rank = my_rank - end_mini_group_start; - int partner = start_mini_group_start + e_rank; - new_group->partners[0] = partner; - new_group->partners[1] = partner; - } else { //Only things left are the three ranks that must be paired to handle odd-sized comms - if(my_rank == 0){ - new_group->partners[0] = comm_size-1; - new_group->partners[1] = mid_rank; - } else if(my_rank == mid_rank){ - new_group->partners[0] = 0; - new_group->partners[1] = comm_size-1; - } else if(my_rank == comm_size-1){ - new_group->partners[0] = mid_rank; - new_group->partners[1] = 0; - } else { - fprintf(stderr, "FENIX_IMR Fatal error: Rank <%d> no partner assigned, this is a bug in IMR!\n", my_rank); - *flag = FENIX_ERROR_GROUP_CREATE; - return; - } - } +char* Entry::partner_data(){ return partner_buf.data(); } +void Entry::partner_resize(int size){ partner_buf.resize(size); } +int Entry::partner_size(){ return partner_buf.size(); } +void Entry::add_and_fit(const DataSubset& subset){ + region += subset; - } else if(new_group->raid_mode == 5){ - new_group->set_size = policy_vals[2]; - new_group->partners = (int*) malloc(sizeof(int) * new_group->set_size); + int new_count = elm_max_count; + if(!new_count) region.max_count(); + if(!new_count) new_count = subset.max_count(); - //User is responsible for giving values that "make sense" for set size and rank separation given a comm size. - int my_set_pos = (my_rank/new_group->rank_separation)%new_group->set_size; - for(int index = 0; index < new_group->set_size; index++){ - new_group->partners[index] = (comm_size + my_rank - (new_group->rank_separation * (my_set_pos-index)))%comm_size; - } + int new_size = new_count*elm_size; + if(new_size > buf.size()) buf.resize(new_size); +} - //Build a comm to use for all of the set's reductions we'll need to do for RAID 5. - MPI_Group comm_group, set_group; - MPI_Comm_group(comm, &comm_group); - MPI_Group_incl(comm_group, new_group->set_size, new_group->partners, &set_group); - MPI_Comm_create_group(comm, set_group, 0, &(new_group->set_comm)); +void Entry::partner_add_and_fit(const DataSubset& subset){ + partner_region += subset; - } + int new_count = elm_max_count; + if(!new_count) partner_region.max_count(); + if(!new_count) new_count = subset.max_count(); - new_group->entries_size = __FENIX_IMR_DEFAULT_MENTRY_NUM; - new_group->entries_count = 0; - new_group->entries = - (fenix_imr_mentry_t*) malloc(sizeof(fenix_imr_mentry_t) * __FENIX_IMR_DEFAULT_MENTRY_NUM); - new_group->num_snapshots = 0; - new_group->timestamps = (int*)malloc(sizeof(int)*depth); - - new_group->base.comm = comm; - new_group->base.current_rank = my_rank; - __imr_sync_timestamps(new_group); - *flag = FENIX_SUCCESS; + int new_size = new_count*elm_size; + if(new_size > partner_buf.size()) partner_buf.resize(new_size); } -//Sets mentry to point to the right index for a given memberid -//If there are no members, the mentry pointer will be invalid and __FENIX_IMR_NO_MEMBERS will be returned. -//If the given memberid is not found, points to the closest and returns anything but FENIX_SUCCESS. -int __imr_find_mentry(fenix_imr_group_t* group, int memberid, fenix_imr_mentry_t** mentry){ - //List is sorted by member id, do binary search. - int retval = -1; - unsigned lower_bound = 0; - unsigned upper_bound = group->entries_count - 1; - - - if(group->entries_count == 0){ - upper_bound = 0; - retval = __FENIX_IMR_NO_MEMBERS; - } +void Entry::reset(){ + timestamp = -2; - while(lower_bound != upper_bound){ - unsigned to_check = (lower_bound + upper_bound)>>1; - - if(group->entries[to_check].memberid == memberid){ - lower_bound = upper_bound = to_check; - } else if(group->entries[to_check].memberid < memberid){ - lower_bound = to_check + 1; - if(lower_bound > upper_bound) lower_bound = upper_bound; - } else { - upper_bound = to_check - 1; - if(lower_bound > upper_bound) upper_bound = lower_bound; - } - } + buf.clear(); + partner_buf.clear(); + + region = {}; + partner_region = {}; +} - *mentry = group->entries + lower_bound; - if(retval != __FENIX_IMR_NO_MEMBERS && (*mentry)->memberid == memberid){ - retval = FENIX_SUCCESS; +Member::Member(fenix_member_entry_t& my_mentry, Group& my_group) + : mentry(my_mentry), group(my_group), send_buf(group.send_buf), + recv_buf(group.recv_buf) +{ + for(int i = 0; i < group.depth+2; i++){ + entries.emplace_back(mentry.datatype_size, mentry.current_count); } - return retval; } -void __imr_alloc_data_region(void** region, int raid_mode, int local_data_size, int set_size){ - if(raid_mode == 1){ - *region = (void*) malloc(2*local_data_size); - } else if(raid_mode == 5){ - //We need space for our own local data, as well as space for the parity data - //We add two just in case the data size isn't evenly divisible by set_size-1 - // 3 is needed because making the parity one larger on some nodes requires - // extra bits of "data" on the other nodes - *region = (void*) malloc(local_data_size + local_data_size/(set_size - 1) + 3); - } else { - debug_print("Error: raid mode <%d> not supported\n", raid_mode); - } +BuddyMember::BuddyMember(fenix_member_entry_t& my_mentry, Group& my_group) + : Member(my_mentry, my_group) { + for(auto& entry : entries) + entry.partner_resize(mentry.datatype_size*mentry.current_count); } -int __imr_member_create(fenix_group_t* g, fenix_member_entry_t* mentry){ - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - int retval = -1; +ParityMember::ParityMember(fenix_member_entry_t& my_mentry, Group& my_group) + : Member(my_mentry, my_group) { + int data_len = (mentry.datatype_size*mentry.current_count); + int parity_len = data_len / (group.set_size-1); - fenix_imr_mentry_t* closest_imr_mentry; - int found_memberid = __imr_find_mentry(group, mentry->memberid, &closest_imr_mentry); + int remainder = data_len % (group.set_size-1); + if(remainder) remainder++; + if(remainder < group.set_rank) parity_len++; - if(found_memberid == FENIX_SUCCESS){ - debug_print("Error Fenix_Data_member_create: member_id <%d> already exists in this policy\n", - mentry->memberid); - } else { - //Double check that we have room for the member. - if(group->entries_count >= group->entries_size){ - group->entries = (fenix_imr_mentry_t*) s_realloc(group->entries, - group->entries_size * 2 * sizeof(fenix_imr_mentry_t)); - group->entries_size *= 2; - } - - fenix_imr_mentry_t* new_imr_mentry; - if(found_memberid == __FENIX_IMR_NO_MEMBERS){ - //This is the first member, it goes at the beginning. - new_imr_mentry = group->entries; - } else { - int closest_index = closest_imr_mentry - group->entries; - //Do we want to place this new member before or after - //the closest member? - if(mentry->memberid > closest_imr_mentry->memberid){ - //Move all entries after the closest to one farther to right, b/c I belong - //right after the closest. - memmove(group->entries+closest_index +1, group->entries+closest_index +2, - group->entries_size - closest_index+1); - new_imr_mentry = group->entries+closest_index+1; - } else { - //Move all entries starting w/ closest to one farther to right, b/c I belong - //right before the closest. - memmove(group->entries+closest_index, group->entries+closest_index +1, - group->entries_size - closest_index); - new_imr_mentry = group->entries+closest_index; - } - } + for(auto& entry : entries) + entry.partner_resize(parity_len); +} - //Now I've got the location to store this member, - //so I just need to actually fill in the data. - new_imr_mentry->current_head = 0; - new_imr_mentry->memberid = mentry->memberid; - - new_imr_mentry->data = (void**) malloc( (group->base.depth+2) * sizeof(void*)); - int local_data_size = mentry->datatype_size * mentry->current_count; - new_imr_mentry->data_regions = - (Fenix_Data_subset *)malloc(sizeof(Fenix_Data_subset) * (group->base.depth+2) ); - new_imr_mentry->timestamp = (int*) malloc(sizeof(int) * (group->base.depth + 2)); - - for(int i = 0; i < group->base.depth + 2; i++){ - __imr_alloc_data_region(new_imr_mentry->data + i, group->raid_mode, local_data_size, group->set_size); - //Initialize to smallest # blocks allowed. - __fenix_data_subset_init(1, new_imr_mentry->data_regions + i); - new_imr_mentry->data_regions[i].specifier = __FENIX_SUBSET_EMPTY; +bool Member::snapshot_delete(int timestamp){ + bool found = false; + for(int i = entries.size(); i >= 0; i--){ + if(entries[i].timestamp == timestamp){ + assert(!found); + found = true; + entries[i].reset(); + } + //Move deleted snapshot to front + if(found && i > 0){ + std::swap(entries[i], entries[i-1]); } - - group->entries_count++; - - retval = FENIX_SUCCESS; } + return found; +} - return retval; +int Member::storev(const DataSubset& subset){ + Entry& e = entries.back(); + e.add_and_fit(subset); + subset.copy_data(e.elm_size, e.elm_max_count, mentry.user_data, e.buf); + return this->storev_impl(subset); } -void __imr_member_free(fenix_imr_mentry_t* mentry, int depth){ - //Start by clearing out the mentry's data pointers. - for(int i = 0; i < depth + 2; i++){ - __fenix_data_subset_free(mentry->data_regions + i); - free(mentry->data[i]); - } +int BuddyMember::exch( + const DataSubset& subset, const DataSubset& partner_subset +){ + const int rank = group.set_rank; + const int left = rank == 0 ? group.set_size-1 : rank-1; + const int right = rank == group.set_size-1 ? 0 : rank+1; + + Entry& e = entries.back(); + e.partner_add_and_fit(partner_subset); + + int recv_count = partner_subset.count(e.elm_max_count-1); + recv_buf.reset(e.elm_size*recv_count); - free(mentry->data); - free(mentry->data_regions); - free(mentry->timestamp); + subset.serialize_data(e.elm_size, e.buf, send_buf); + + MPI_Sendrecv( + send_buf.data(), send_buf.size(), MPI_BYTE, right, 0, + recv_buf.data(), recv_buf.size(), MPI_BYTE, left, 0, + group.set_comm, MPI_STATUS_IGNORE + ); + + partner_subset.deserialize_data( + e.elm_size, recv_buf, e.partner_buf + ); + return FENIX_SUCCESS; } -int __imr_member_delete(fenix_group_t* g, int member_id){ - int retval = FENIX_SUCCESS; - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - //Find the member first - fenix_imr_mentry_t *mentry; - int found_member = __imr_find_mentry(group, member_id, &mentry); - - if(found_member != FENIX_SUCCESS){ - debug_print("ERROR Fenix_Data_member_delete: member_id <%d> does not exist!\n", - member_id); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - - //Free all of the pointers in the mentry - __imr_member_free(mentry, group->base.depth); +int BuddyMember::storev_impl(const DataSubset& subset){ + //My partner ranks (within set_comm) + const int rank = group.set_rank; + const int left = rank == 0 ? group.set_size-1 : rank-1; + const int right = rank == group.set_size-1 ? 0 : rank+1; - //Now shift all the subsequent mentries back one, unless I'm already the last one. - int member_index = mentry - group->entries; - if(member_index != (group->entries_count-1) ){ - memmove(mentry, mentry+1, group->entries_count - 1 - member_index); - } + DataBuffer send_buf, recv_buf; + subset.serialize(send_buf); - group->entries_count--; + for(int i = 0; i < group.set_size; i++){ + if(i == rank) send_buf.send(right, 0, group.set_comm); + if(i == left) recv_buf.recv_unknown(left, 0, group.set_comm); } - return retval; + + return exch(subset, DataSubset(recv_buf)); } +int Member::store(const DataSubset& subset){ + Entry& e = entries.back(); + e.add_and_fit(subset); + subset.copy_data(e.elm_size, e.elm_max_count, mentry.user_data, e.buf); + return this->store_impl(subset); +} +int BuddyMember::store_impl(const DataSubset& subset){ + return exch(subset, subset); +} -int __imr_member_store(fenix_group_t* g, int member_id, - Fenix_Data_subset subset_specifier){ - int retval = -1; - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - - fenix_imr_mentry_t* mentry; - int found_member = __imr_find_mentry(group, member_id, &mentry); - - fenix_member_entry_t* member_data; - //Shouldn't need to check for failure to find the member, that should be done before - //calling - int member_data_index = __fenix_search_memberid(&group->base, member_id); - member_data = &(group->base.members[member_data_index]); - - if(found_member != FENIX_SUCCESS){ - debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist on rank <%d>!\n", - member_id, g->current_rank); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - //Copy my own data, trade data with partner, update data region - //Store my data at the beginning of the member's buffer, resiliency data after that. - __fenix_data_subset_copy_data(&subset_specifier, mentry->data[mentry->current_head], - member_data->user_data, member_data->datatype_size, member_data->current_count); - - if(group->raid_mode == 1){ - - size_t serialized_size; - void* serialized = __fenix_data_subset_serialize(&subset_specifier, - mentry->data[mentry->current_head], member_data->datatype_size, - member_data->current_count, &serialized_size); - - void* recv_buf = malloc(serialized_size * member_data->datatype_size); - - MPI_Sendrecv(serialized, serialized_size * member_data->datatype_size, MPI_BYTE, - group->partners[1], group->base.groupid ^ STORE_PAYLOAD_TAG, - recv_buf, serialized_size * member_data->datatype_size, MPI_BYTE, - group->partners[0], group->base.groupid ^ STORE_PAYLOAD_TAG, - group->base.comm, NULL); - - //Expand the serialized data out and store into the partner's portion of this data entry. - __fenix_data_subset_deserialize(&subset_specifier, recv_buf, - ((uint8_t*)mentry->data[mentry->current_head]) + member_data->datatype_size*member_data->current_count, - member_data->current_count, member_data->datatype_size); - - free(recv_buf); - free(serialized); - - retval = FENIX_SUCCESS; - } else if(group->raid_mode == 5){ - //TODO: Try to optimize for partial commits - currently does parity on the whole region regardless of commit area. - //TODO: I'm not sure if this is the best way to do this - could be a bottleneck if this is unoptimized since this - // could be running on a lot of data. - - //Why does this do it this way? - //In order to do recovery on a given block of data, we need to be missing only 1 of: - // all of the data in the corresponding blocks and the parity for those blocks - //Standard RAID does this by having one disk store parity for a given block instead of data, but this assumes - // that there is no benefit to data locality - in our case we want each node to have a local copy of its own - // data, preferably in a single (virtually) continuous memory range for data movement optimization. So we'll - // store the local data, then put 1/N of the parity data at the bottom of the commit. - //The weirdness comes from the fact that a given node CANNOT contribute to the data being checked for parity which - // will be stored on itself. IE, a node cannot save both a portion of the data and the parity for that data portion - - // doing so would mean if that node fails it is as if we lost two nodes for recovery semantics, making every failure - // non-recoverable. - // This means we need to do an XOR reduction across every node but myself, then store the result on myself - this is - // a little awkward with MPI's reductions which require full comm participation and do not receive any information about - // the source of a given chunk of data (IE we can't exclude data from node X, as we want to). - //This is easily doable using MPI send/recvs, but doing it that way neglects all of the data/comm size optimizations, - // as well as any block XOR optimizations from MPI's reduction operations. - //We could do something like an alltoallv to send appropriate data to each node, then let them calculate parity info locally - // However, we have to either allocate space to hold an extra copy of the entire data size, or we overwrite our - // local buffer and have to re-distribute the data afterward. - //I think the best way to handle it will be to manipulate the XOR function. We will do a reduction which uses local data - // that we do not actually want involved in calculating the parity. Then, we will XOR the local data with the result - // to get the accurate parity info. - // This involves computing the XOR on an extra 2/(set_size-1)*parity_size of data, but minimizes excess memory allocation - // and network use. Scales well with higher set sizes. - int parity_size = (member_data->datatype_size * member_data->current_count)/(group->set_size - 1); - int remainder = (member_data->datatype_size * member_data->current_count)%(group->set_size - 1); - - if(remainder != 0) remainder++; - - void* data_buf = mentry->data[mentry->current_head]; - //store parity info after my data in data region. - //we always have a spare data buffer byte for rounding stuff, so store after that as well. - void* parity_buf = (void*)((char*)data_buf + member_data->datatype_size*member_data->current_count + 2); - - int my_set_rank; - MPI_Comm_rank(group->set_comm, &my_set_rank); - int offset = 0, rounding_compensator; - for(int i = 0; i < group->set_size; i++){ - //Last node is an edge case. - if((my_set_rank == group->set_size-1) && i==my_set_rank){ - offset = 0; - } - - MPI_Reduce((char*)data_buf + offset, parity_buf, parity_size + (i < remainder ? 1 : 0), MPI_BYTE, - MPI_BXOR, i, group->set_comm); - if(i != my_set_rank){ - offset += parity_size + (i < remainder ? 1 : 0); - } - } +int ParityMember::store_impl(const DataSubset& subset){ + Entry& entry = entries.back(); - //Each node has buffer which contains parity^some_local_data, so now pull parity from that. - offset = my_set_rank * parity_size + (my_set_rank < remainder ? my_set_rank : remainder); - - //As above, last node is an edge case. - if(my_set_rank == group->set_size - 1){ - offset = 0; - } + int parity_size = entry.size()/(group.set_size - 1); + int remainder = entry.size()%(group.set_size - 1); - //Utilize MPI's local XOR function, assuming it is more optimized than a naive implementation would be. - MPI_Reduce_local((void*)((char*)data_buf + offset), parity_buf, parity_size + (my_set_rank < remainder ? 1 : 0), - MPI_BYTE, MPI_BXOR); + //If we have any remainder, treat as if we have one more, since a rank + //storing a larger parity block wasn't able to store a larger data block, so + //all such ranks need one extra larger data block. + if(remainder) remainder++; - //Finally, each node has the right stuff. + int m_parity_size = parity_size; + if(group.set_rank is not supported yet!\n", - group->raid_mode); - retval = FENIX_ERROR_UNINITIALIZED; + if(offset+len > entry.size()){ + //Since we pretend to have an extra remainder if there is any + assert(remainder); + assert(group.set_rank >= remainder); + assert(offset+len == entry.size()+1); + offset--; + } + input = entry.data()+offset; + offset += len; } - //Make sure to update which data regions this entry contains. - __fenix_data_subset_merge_inplace(mentry->data_regions + mentry->current_head, &subset_specifier); - + MPI_Reduce( + MPI_IN_PLACE, input, len, MPI_BYTE, MPI_BXOR, i, group.set_comm + ); } - - return retval; + assert(offset == entry.size()); + return FENIX_SUCCESS; } +void Member::commit(int timestamp){ + entries.back().timestamp = timestamp; + Entry oldest = std::move(entries.front()); + entries.pop_front(); + oldest.reset(); + entries.push_back(std::move(oldest)); +} +int Member::restore(){ + //First clear out any snapshots that we have but the group doesn't. + auto begin = group.timestamps.begin(); + const auto end = group.timestamps.end(); + for(int entry = 0; entry < entries.size()-1; entry++){ + if(entries[entry].timestamp == -2) continue; + begin = std::lower_bound(begin, end, entries[entry].timestamp); + if(begin == end || *begin != entries[entry].timestamp) + entries[entry].reset(); + } -int __imr_member_storev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier){return 0;} -int __imr_member_istore(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request){return 0;} -int __imr_member_istorev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request){return 0;} + //Now make sure snapshots align with group's timestamps + for(int snapshot = 1; snapshot <= group.timestamps.size(); snapshot++){ + int timestamp = group.timestamps[group.timestamps.size()-snapshot]; + int target = entries.size()-snapshot-1; + int actual; + for(actual = target; actual >= 0; actual--){ + if(entries[actual].timestamp == timestamp) break; + } + if(actual == target) continue; + if(actual != -1) { + std::swap(entries[actual], entries[target]); + } else { + int free = -1; + for(int i = 0; i <= target && free == -1; i++){ + if(entries[i].timestamp == -2) free = i; + } + assert(free != -1); + std::swap(entries[free], entries[target]); + } + } + //Reset the current store buffer entry + entries.back().reset(); + return this->restore_impl(); +} +int BuddyMember::restore_impl(){ + //My partner ranks (within set_comm) + const int rank = group.set_rank; + const int left = rank == 0 ? group.set_size-1 : rank-1; + const int right = rank == group.set_size-1 ? 0 : rank+1; + + //Data on which partners have found each snapshot + int found[3]; + int& found_here = found[rank]; + int& found_left = found[left]; + int& found_right = found[right]; + + auto e = entries.rbegin()+1; + auto ts = group.timestamps.rbegin(); + for(; ts != group.timestamps.rend(); ts++, e++){ + fenix_assert(e->timestamp == -2 || e->timestamp == *ts); + + found_here = e->timestamp != -2; + MPI_Allgather( + MPI_IN_PLACE, 1, MPI_INT, found, 1, MPI_INT, group.set_comm + ); + + int n_missing = 0; + for(int i = 0; i < group.set_size; i++) if(!found[i]) n_missing++; + if(n_missing == 0) + continue; + else if(n_missing > 1){ + if(group.set_rank == 0) + debug_print( + "WARNING Fenix_Data_member_restore: %s member %d timestamp %d unrecoverable", + group.str().c_str(), id, *ts + ); + continue; + } + + if(!found_here){ + //Fetch my data region from right partner + recv_buf.recv_unknown(right, 0, group.set_comm); + e->add_and_fit({recv_buf}); + //Fetch my data + int m_count = e->region.count(e->elm_max_count-1); + recv_buf.recv(m_count*e->elm_size, right, 0, group.set_comm); + e->region.deserialize_data(e->elm_size, recv_buf, e->buf); + + //Fetch left partner's region + recv_buf.recv_unknown(left, 0, group.set_comm); + e->partner_add_and_fit({recv_buf}); + //Fetch data + int p_count = e->partner_region.count(e->elm_max_count-1); + recv_buf.recv(p_count*e->elm_size, left, 0, group.set_comm); + e->region.deserialize_data(e->elm_size, recv_buf, e->partner_buf); + + //Only update timestamp after all other data updated, to indicate + //recovery of this snapshot completed + e->timestamp = *ts; + } + if(!found_left){ + //Send partner's data region + e->partner_region.serialize(send_buf); + send_buf.send(left, 0, group.set_comm); + //Send their data + e->partner_region.serialize_data( + e->elm_size, e->partner_buf, send_buf + ); + send_buf.send(left, 0, group.set_comm); + } + if(!found_right){ + //Send my data region + e->region.serialize(send_buf); + send_buf.send(right, 0, group.set_comm); + //Send my data + e->region.serialize_data(e->elm_size, e->buf, send_buf); + send_buf.send(right, 0, group.set_comm); + } + } + return FENIX_SUCCESS; +} -int __imr_commit(fenix_group_t* g){ - //No sources of error for this one yet. - int to_return = FENIX_SUCCESS; - - fenix_imr_group_t *group = (fenix_imr_group_t*)g; +int ParityMember::restore_impl(){ + //Data on which partners have found each snapshot + std::vector found; + found.resize(group.set_size); + int found_here; - if(group->num_snapshots == group->base.depth+1){ - //Full of timestamps, remove the oldest and proceed as normal. - memcpy(group->timestamps, group->timestamps+1, group->base.depth); - group->num_snapshots--; - } - group->timestamps[group->num_snapshots++] = group->base.timestamp; - - - //For each entry id (eid) - for(int eid = 0; eid < group->entries_count; eid++){ - fenix_imr_mentry_t *mentry = &group->entries[eid]; - - if(mentry->current_head == group->base.depth + 1){ - //The entry is full, one snapshot should be shifted out. - - //Save this pointer to reuse the allocated memory - void* first_data = mentry->data[0]; - - for(int snapshot = 0; snapshot < group->base.depth + 1; snapshot++){ - //lightweight movement, just moving the pointers about. - mentry->data[snapshot] = mentry->data[snapshot + 1]; - __fenix_data_subset_deep_copy(mentry->data_regions + snapshot + 1, - mentry->data_regions + snapshot); - mentry->timestamp[snapshot] = mentry->timestamp[snapshot + 1]; + auto e = entries.rbegin()+1; + auto ts = group.timestamps.rbegin(); + for(; ts != group.timestamps.rend(); ts++, e++){ + fenix_assert(e->timestamp == -2 || e->timestamp == *ts); + + found_here = e->timestamp != -2; + MPI_Allgather( + &found_here, 1, MPI_INT, found.data(), 1, MPI_INT, group.set_comm + ); + + int recovering = -1; + for(int i = 0; i < group.set_size; i++){ + if(found[i]) continue; + if(recovering != -1){ + if(group.set_rank == 0) + debug_print( + "WARNING Fenix_Data_member_restore: %s member %d timestamp %d unrecoverable", + group.str(), id, *ts + ); + recovering = -1; + break; + } else { + recovering = i; } + } + if(recovering == -1) continue; + + int sender = recovering == 0 ? 1 : 0; + if(group.set_rank == sender){ + e->region.serialize(send_buf); + send_buf.send(recovering, 0, group.set_comm); + } else if(!found_here){ + recv_buf.recv_unknown(sender, 0, group.set_comm); + e->add_and_fit({recv_buf}); + } - mentry->data[group->base.depth + 1] = first_data; - mentry->data_regions[group->base.depth + 1].specifier = __FENIX_SUBSET_EMPTY; - mentry->current_head--; + //Use the same logic as store, but recovering rank is always root and + //zeroes out the local data region before participating. + int parity_size = e->size()/(group.set_size - 1); + int remainder = e->size()%(group.set_size - 1); + if(remainder) remainder++; + int m_parity_size = parity_size; + if(group.set_rankpartner_resize(m_parity_size); + + if(!found_here){ + std::memset(e->data(), 0, e->size()); + std::memset(e->partner_data(), 0, e->partner_size()); } - mentry->timestamp[mentry->current_head++] = group->base.timestamp; + int offset = 0; + for(int i = 0; i < group.set_size; i++){ + int len = i < remainder ? parity_size + 1 : parity_size; + char* input; + if(group.set_rank == i){ + input = e->partner_data(); + } else { + if(offset+len > e->size()) offset--; + input = e->data()+offset; + offset += len; + } + MPI_Reduce( + MPI_IN_PLACE, input, len, MPI_BYTE, MPI_BXOR, recovering, + group.set_comm + ); + } + assert(offset == e->size()); + + e->timestamp = *ts; } - return to_return; + return FENIX_SUCCESS; } +int Member::lrestore( + char* target, int max_restore, int timestamp, DataSubset& recovered +){ + //Restoring always clears the commit buffer + entries.back().reset(); -int __imr_snapshot_delete(fenix_group_t* g, int time_stamp){ - int retval = FENIX_SUCCESS; - - fenix_imr_group_t *group = (fenix_imr_group_t*)g; - - for(int entry_id = 0; entry_id < group->entries_count && retval == FENIX_SUCCESS; entry_id++){ - //Search for the timestamp in each group. Given how commits and deletes work, we know - //the snapshots are sorted by timestamp in the arrays. - fenix_imr_mentry_t mentry = group->entries[entry_id]; - - //current_head is the staging area's entry, so start before that and work backwards. - //We'll work backwards under the assumption that snapshots are likely to be deleted soon after creation. - // (Does this assumption seem valid?) - for(int snapshot = mentry.current_head - 1; snapshot >= 0 && retval == FENIX_SUCCESS; snapshot--){ - if(mentry.timestamp[snapshot] < time_stamp){ - retval = FENIX_ERROR_INVALID_TIMESTAMP; - - } else if(mentry.timestamp[snapshot] == time_stamp){ - void* old_data = mentry.data[snapshot]; - - for(int to_shift = snapshot; to_shift < mentry.current_head; to_shift++){ - mentry.timestamp[to_shift] = mentry.timestamp[to_shift + 1]; - __fenix_data_subset_deep_copy(mentry.data_regions + to_shift+1, - mentry.data_regions + to_shift); - mentry.data[to_shift] = mentry.data[to_shift + 1]; - } - mentry.data[mentry.current_head] = old_data; - mentry.data_regions[mentry.current_head].specifier = __FENIX_SUBSET_EMPTY; - - mentry.current_head--; + int end = 0; + if(timestamp == FENIX_TIME_STAMP_MAX){ + if(entries[entries.size()-2].timestamp >= 0){ + end = entries.size()-1; + } + } else { + for(int i = entries.size()-2; i >= 0; i--){ + if(entries[i].timestamp == timestamp){ + end = i+1; break; } } } - if(retval == FENIX_SUCCESS){ - group->num_snapshots--; + int begin = end > 0 ? end-1 : end; + if(max_restore != 0){ + for(int i = end-1; i >= 0 && !recovered.includes_all(max_restore) ; i--){ + if(entries[i].timestamp < 0) break; + begin = i; + recovered += entries[i].region; + } + } else if(begin < end) { + recovered = entries[begin].region; } - return retval; -} - + for(int i = begin; i < end && target != NULL; i++){ + Entry& e = entries[i]; + e.region.copy_data(e.elm_size, e.buf, max_restore, target); + } + if(end <= 0) return FENIX_ERROR_NODATA_FOUND; + if(max_restore != 0 && !recovered.includes_all(max_restore)) + return FENIX_WARNING_PARTIAL_RESTORE; + return FENIX_SUCCESS; +} -int __imr_barrier(fenix_group_t* group){return 0;} +Group::Group( + MPI_Comm m_comm, int timestart, int depth, int* policy, int* flag +){ + int* policy_vals = (int*)policy; + mode = policy_vals ? policy_vals[0] : 1; + rank_separation = policy_vals ? policy_vals[1] : __fenix_get_world_size(m_comm)/2; + comm = m_comm; -int __imr_get_number_of_snapshots(fenix_group_t* group, - int* number_of_snapshots){ - return ((fenix_imr_group_t*)group)->num_snapshots; -} + int my_rank, comm_size; + MPI_Comm_size(comm, &comm_size); + MPI_Comm_rank(comm, &my_rank); + current_rank = my_rank; -int __imr_get_snapshot_at_position(fenix_group_t* g, int position, - int* time_stamp){ - int retval = -1; + std::set partner_set; + partner_set.insert(my_rank); - fenix_imr_group_t *group = (fenix_imr_group_t*)g; + if(mode == 1){ + //odd-sized groups take some extra handling. + bool isOdd = ((comm_size%2) != 0); + + int remaining_size = comm_size; + if(isOdd) remaining_size -= 3; + + //We want to form groups of rank_separation*2 to pair within + int n_full_groups = remaining_size / (rank_separation*2); + + //We don't always get what we want though, one group may need to be + //smaller. + int mini_group_size = + (remaining_size - n_full_groups*rank_separation*2)/2; + + int start_rank = mini_group_size + (isOdd?1:0); + int mid_rank = comm_size/2; //Only used when isOdd + + int end_mini_group_start = comm_size-mini_group_size-(isOdd?1:0); + int start_mini_group_start = (isOdd?1:0); + bool in_start_mini = + my_rank >= start_mini_group_start + && my_rank < start_mini_group_start+mini_group_size; + bool in_end_mini = + my_rank >= end_mini_group_start && my_rank < comm_size-(isOdd?1:0); - if(!(position < group->num_snapshots)){ - retval = FENIX_ERROR_INVALID_POSITION; - } else { - //Each member ought to have the same snapshots, in the same order. - //If this isn't true, some other bug has occurred. Thus, we will just - //query the first member. - *time_stamp = group->entries[0].timestamp[group->entries[0].current_head - 1 - position]; - retval = FENIX_SUCCESS; - } - - return retval; -} + //Allocate the "normal" ranks + if(my_rank >= start_rank && my_rank < end_mini_group_start + && (!isOdd || my_rank != mid_rank)){ + //"effective" rank for determining which group I'm in and if I look + //forward or backward for a partner. + int e_rank = my_rank - start_rank; + if(isOdd && my_rank > mid_rank) + --e_rank; //We skip the middle rank when isOdd + int my_partner; + if(((e_rank/rank_separation)%2) == 0){ + //Look forward for partner. + my_partner = my_rank + rank_separation; + if(isOdd && my_rank < mid_rank && my_partner >= mid_rank) + ++my_partner; + } else { + my_partner = my_rank - rank_separation; + if(isOdd && my_rank > mid_rank && my_partner <= mid_rank) + --my_partner; + } -int __imr_member_restore(fenix_group_t* g, int member_id, - void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found){ - int retval = -1; + partner_set.insert(my_partner); + } else if(in_start_mini) { + int e_rank = my_rank - start_mini_group_start; + int partner = end_mini_group_start + e_rank; + partner_set.insert(partner); + } else if(in_end_mini) { + int e_rank = my_rank - end_mini_group_start; + int partner = start_mini_group_start + e_rank; + partner_set.insert(partner); + } else { + //Only things left are the three ranks that must be paired to handle + //odd-sized comms + partner_set.insert({0, mid_rank, comm_size-1}); + + //my_rank should be one of the inserted ranks, or something in the + //logic here is broken. + assert(partner_set.size() == 3); + } + } else if(mode == 5){ + set_size = policy_vals[2]; - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - //One-time fix after a reinit. - if(group->base.timestamp == -1 && group->num_snapshots > 0) - group->base.timestamp = group->timestamps[group->num_snapshots-1]; + //User is responsible for giving values that "make sense" for set size and rank separation given a comm size. + int my_set_pos = (my_rank/rank_separation)%set_size; + for(int index = 0; index < set_size; index++){ + int partner = (comm_size + my_rank - + rank_separation * (my_set_pos-index) + )%comm_size; + partner_set.insert(partner); + } + } - fenix_imr_mentry_t* mentry; - //find_mentry returns the error status. We found the member (and corresponding data) if there are no errors. - int found_member = !(__imr_find_mentry(group, member_id, &mentry)); - - fenix_member_entry_t member_data; - if(found_member){ - int member_data_index = __fenix_search_memberid(&group->base, member_id); - member_data = group->base.members[member_data_index]; + partners = {partner_set.begin(), partner_set.end()}; + + //Make same MPI calls as reinit + reinit(flag); +} + +void Group::build_set_comm(){ + if(set_comm != MPI_COMM_NULL){ + MPI_Comm_free(&set_comm); + set_comm = MPI_COMM_NULL; } - int recovery_locally_possible; + MPI_Group comm_group, set_group; + MPI_Comm_group(comm, &comm_group); + MPI_Group_incl(comm_group, partners.size(), partners.data(), &set_group); + MPI_Comm_create_group(comm, set_group, 0, &(set_comm)); + + MPI_Group_free(&comm_group); + MPI_Group_free(&set_group); - fenix_imr_undo_log_t* undo_data; //Used for undoing partial restores interrupted by failures. + MPI_Comm_size(set_comm, &set_size); + MPI_Comm_rank(set_comm, &set_rank); +} - if(group->raid_mode == 1){ - int my_data_found, partner_data_found; +Member* Group::find_member(int memberid){ + auto iter = member_data.find(memberid); + if(iter != member_data.end()) return iter->second.get(); + return nullptr; +} - //We need to know if both partners found their data. - //First send to partner 0 and recv from partner 1, then flip. - MPI_Sendrecv(&found_member, 1, MPI_INT, group->partners[0], PARTNER_STATUS_TAG, - &my_data_found, 1, MPI_INT, group->partners[1], PARTNER_STATUS_TAG, - group->base.comm, NULL); - MPI_Sendrecv(&found_member, 1, MPI_INT, group->partners[1], PARTNER_STATUS_TAG, - &partner_data_found, 1, MPI_INT, group->partners[0], PARTNER_STATUS_TAG, - group->base.comm, NULL); - - - if(found_member && partner_data_found && my_data_found){ - //I have my data, and the person who's data I am backing up has theirs. We're good to go. - retval = FENIX_SUCCESS; - } else if (!found_member && (!my_data_found || !partner_data_found)){ - //I lost my data, and my partner doesn't have a copy for me to restore from. - debug_print("ERROR Fenix_Data_member_restore: member_id <%d> does not exist at <%d> or partner(s) <%d> <%d>\n", - member_id, group->base.current_rank, group->partners[0], group->partners[1]); - - retval = FENIX_ERROR_INVALID_MEMBERID; - } else if(found_member){ - //My partner(s) need info on this member. This policy does nothing special w/ extra input params, so - //I can just send the basic member metadata. - if(!partner_data_found) - __fenix_data_member_send_metadata(group->base.groupid, member_id, group->partners[0]); - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - //send data region info next - if(!partner_data_found) - __fenix_data_subset_send(mentry->data_regions + snapshot, group->partners[0], - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->base.comm); - - size_t size; - void* toSend; - //send my data, to maintain resiliency on my data - if(!my_data_found){ - toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, - mentry->data[snapshot], member_data.datatype_size, member_data.current_count, - &size); - MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[1], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); - free(toSend); - } - - //send their data - if(!partner_data_found){ - toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, - ((char*)mentry->data[snapshot]) + member_data.datatype_size*member_data.current_count, - member_data.datatype_size, member_data.current_count, &size); - MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); - free(toSend); - } - - } +std::string Group::str(){ + std::stringstream ss; + ss << "Group " << groupid << " set "; + ss << "[" << partners[0]; + for(int i=1; ibase.groupid, group->partners[1], &packet); - - //We remake the new member just like the user would. - __fenix_member_create(group->base.groupid, packet.memberid, NULL, packet.current_count, - packet.datatype_size); - - //Mark the member for deletion if another failure interrupts recovering fully. - undo_data = (fenix_imr_undo_log_t*)malloc(sizeof(fenix_imr_undo_log_t)); - undo_data->groupid = group->base.groupid; - undo_data->memberid = member_id; - Fenix_Callback_register(__imr_undo_restore, (void*)undo_data); - - __imr_find_mentry(group, member_id, &mentry); - int member_data_index = __fenix_search_memberid(&group->base, member_id); - member_data = group->base.members[member_data_index]; - - mentry->current_head = group->num_snapshots; - - //now recover data. - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - mentry->timestamp[snapshot] = group->timestamps[snapshot]; - - __fenix_data_subset_free(mentry->data_regions+snapshot); - __fenix_data_subset_recv(mentry->data_regions+snapshot, group->partners[1], - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->base.comm); - - int recv_size = __fenix_data_subset_data_size(mentry->data_regions + snapshot, - member_data.current_count); - - if(recv_size > 0){ - void* recv_buf = malloc(member_data.datatype_size * recv_size); - //first receive their data, so store in the resiliency section. - MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); - __fenix_data_subset_deserialize(mentry->data_regions + snapshot, recv_buf, - ((char*)mentry->data[snapshot]) + member_data.current_count*member_data.datatype_size, - member_data.current_count, member_data.datatype_size); - - //Now receive my data. - MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[1], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); - __fenix_data_subset_deserialize(mentry->data_regions + snapshot, recv_buf, - mentry->data[snapshot], member_data.current_count, member_data.datatype_size); - - free(recv_buf); - } - } - - //Member restored fully, so we don't need to mark it for undoing on failure. - Fenix_Callback_pop(); - free(undo_data); - } + return ss.str(); +} +int Group::member_create(fenix_member_entry_t* mentry){ + auto iter = member_data.try_emplace(mentry->memberid, nullptr); + if(iter.second){ + if(mode == 1) + iter.first->second = std::make_shared(*mentry, *this); + else if(mode == 5) + iter.first->second = std::make_shared(*mentry, *this); + else assert(false); + + return FENIX_SUCCESS; + } else return FENIX_ERROR_MEMBER_CREATE; +} - recovery_locally_possible = found_member || (my_data_found && partner_data_found); - if(recovery_locally_possible) retval = FENIX_SUCCESS; +int Group::member_delete(int member_id){ + auto iter = member_data.find(member_id); - } else if (group->raid_mode == 5){ - int* set_results = (int *) malloc(sizeof(int) * group->set_size); - MPI_Allgather((void*)&found_member, 1, MPI_INT, (void*)set_results, 1, MPI_INT, - group->set_comm); + if(iter == member_data.end()){ + debug_print("ERROR Fenix_Data_member_delete: member_id <%d> does not exist!\n", + member_id); + return FENIX_ERROR_INVALID_MEMBERID; + } - int recovering_node = -1, recovery_possible = 1; - for(int i = 0; i < group->set_size; i++){ - if(!set_results[i]){ - - if(recovering_node == -1){ - recovering_node = i; - } else { - recovery_possible = 0; - break; - } - - } - } + member_data.erase(iter); + return FENIX_SUCCESS; +} - free(set_results); - - //If we have a recovering node, and recovery is possible, do it - if((recovering_node != -1) && recovery_possible){ - int my_set_rank; - MPI_Comm_rank(group->set_comm, &my_set_rank); - - //The recovering node needs metadata on this member, just needs it from one partner. - if((recovering_node == 0 && my_set_rank == 1) || my_set_rank == 0){ - //I'm the node that's going to send metadata - - //This function pulls comm from the base group - so we need to give - //dest_rank in terms of that comm - __fenix_data_member_send_metadata(group->base.groupid, member_id, group->partners[recovering_node]); - - //Now my partner will need all of the entries. First they'll need to know how many snapshots - //to expect. - MPI_Send((void*) &(group->num_snapshots), 1, MPI_INT, recovering_node, - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm); - - //They also need the timestamps for each snapshot, as well as the value for the next. - MPI_Send((void*)mentry->timestamp, group->num_snapshots+1, MPI_INT, recovering_node, - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm); - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - __fenix_data_subset_send(mentry->data_regions + snapshot, recovering_node, - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->set_comm); - } - - } else if(!found_member) { - //I'm the one that needs the info. - fenix_member_entry_packet_t packet; - __fenix_data_member_recv_metadata(group->base.groupid, group->partners[my_set_rank==0 ? 1 : 0], &packet); - - //We remake the new member just like the user would. - __fenix_member_create(group->base.groupid, member_id, NULL, packet.current_count, - packet.datatype_size); - - //Mark the member for deletion if another failure interrupts recovering fully. - undo_data = (fenix_imr_undo_log_t*)malloc(sizeof(fenix_imr_undo_log_t)); - undo_data->groupid = group->base.groupid; - undo_data->memberid = member_id; - Fenix_Callback_register(__imr_undo_restore, (void*)undo_data); - - - __imr_find_mentry(group, member_id, &mentry); - int member_data_index = __fenix_search_memberid(&group->base, member_id); - member_data = group->base.members[member_data_index]; - - - MPI_Recv((void*)&(group->num_snapshots), 1, MPI_INT, (my_set_rank==0 ? 1 : 0), - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm, NULL); - - mentry->current_head = group->num_snapshots; - - //We also need to explicitly ask for all timestamps, since user may have deleted some and caused mischief. - MPI_Recv((void*)(mentry->timestamp), group->num_snapshots + 1, MPI_INT, (my_set_rank==0 ? 1 : 0), - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm, NULL); - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - __fenix_data_subset_free(mentry->data_regions+snapshot); - __fenix_data_subset_recv(mentry->data_regions+snapshot, (my_set_rank==0 ? 1 : 0), - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->set_comm); - } - } +int Group::member_store(int member_id, const DataSubset& subset){ + auto iter = member_data.find(member_id); + if(iter == member_data.end()){ + debug_print( + "ERROR Fenix_Data_member_store: %s unknown member_id %d on rank %d\n", + this->str().c_str(), member_id, current_rank + ); + return FENIX_ERROR_INVALID_MEMBERID; + } + return iter->second->store(subset); +} - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - //Similar to the process of doing a store, we're going to end up XORing with noisy data from - //the recovering node, then XORing with it again to get what we actually want. - int parity_size = (member_data.datatype_size*member_data.current_count)/(group->set_size-1); - int remainder = (member_data.datatype_size*member_data.current_count)%(group->set_size-1); - - if(remainder > 0) remainder++; - - void* data_buf = mentry->data[snapshot]; - void* parity_buf = (void*)((char*)data_buf + member_data.datatype_size*member_data.current_count + 2); - - int offset = 0; - for(int i = 0; i < group->set_size; i++){ - //Make sure to send the (out of order) parity info on the correct grouping - void* toSend; - if(i == my_set_rank){ - if(my_set_rank != recovering_node){ - toSend = parity_buf; - } else { - toSend = data_buf; - } - } else { - if(my_set_rank != recovering_node){ - toSend = (void*)((char*)data_buf + offset); - } else { - toSend = parity_buf; - } - } - - void* recv_buf = (i == my_set_rank ? parity_buf : (void*)((char*)data_buf + offset)); - - MPI_Reduce(toSend, recv_buf, parity_size + (1set_comm); - - if(my_set_rank == recovering_node){ - //Remove the random data I had to send from the result. - MPI_Reduce_local(toSend, recv_buf, parity_size + (istr().c_str(), member_id, current_rank + ); + return FENIX_ERROR_INVALID_MEMBERID; + } + return iter->second->storev(subset); +} - } +int Group::member_istore( + int member_id, const DataSubset& subset, Fenix_Request *request +) {return 0;} - retval = FENIX_SUCCESS; - recovery_locally_possible = 1; - } else if(!found_member){ - debug_print("ERROR Fenix_Data_member_restore: member_id <%d> does not exist at <%d> and is not recoverable from RAID-5 set\n", - member_id, group->base.current_rank); - - retval = FENIX_ERROR_INVALID_MEMBERID; - recovery_locally_possible = 0; - } else { - retval = FENIX_SUCCESS; - recovery_locally_possible = 1; - } +int Group::member_istorev( + int member_id, const DataSubset& subset_specifier, Fenix_Request *request +) {return 0;} +int Group::commit(){ + if(timestamps.size() == depth+1){ + //Full of timestamps, remove the oldest and proceed as normal. + timestamps.pop_front(); + } + timestamps.push_back(timestamp); - } else { - debug_print("ERROR Fenix_Data_member_store: Raid mode <%d> is not supported yet!\n", - group->raid_mode); - retval = FENIX_ERROR_UNINITIALIZED; - recovery_locally_possible = 0; + for(auto& iter : member_data){ + iter.second->commit(timestamp); } - + send_buf.clear(); + send_buf.shrink_to_fit(); + recv_buf.clear(); + recv_buf.shrink_to_fit(); - //Now that we've ensured everyone has data, restore from it. - - int return_found_data; - if(data_found == NULL){ - data_found = (Fenix_Data_subset*) malloc(sizeof(Fenix_Data_subset)); - return_found_data = 0; - } else { - return_found_data = 1; - } - __fenix_data_subset_init(1, data_found); - - //Don't try to restore if we weren't able to get the relevant data. - if(recovery_locally_possible && target_buffer != NULL){ - data_found->specifier = __FENIX_SUBSET_EMPTY; - - int oldest_snapshot; - for(oldest_snapshot = (mentry->current_head - 1); oldest_snapshot >= 0; oldest_snapshot--){ - __fenix_data_subset_merge_inplace(data_found, mentry->data_regions + oldest_snapshot); - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - //The snapshots have formed a full set of data, not need to add older snapshots. - break; - } - } - - //If there isn't a full set of data, don't try to pull from nonexistent snapshot. - if(oldest_snapshot == -1){ - oldest_snapshot = 0; - } - - for(int i = oldest_snapshot; i < mentry->current_head; i++){ - __fenix_data_subset_copy_data(&mentry->data_regions[i], target_buffer, - mentry->data[i], member_data.datatype_size, member_data.current_count); - } + return FENIX_SUCCESS; +} - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - retval = FENIX_SUCCESS; - } else { - retval = FENIX_WARNING_PARTIAL_RESTORE; + +int Group::snapshot_delete(int to_delete){ + int retval = FENIX_SUCCESS; + + bool found = false; + for(auto it = timestamps.begin(); it != timestamps.end(); it++){ + if(*it == to_delete){ + timestamps.erase(it); + found = true; + break; } - } else { - data_found->specifier = __FENIX_SUBSET_EMPTY; } - - if(!return_found_data){ - __fenix_data_subset_free(data_found); - free(data_found); + for(auto& iter : member_data){ + found |= iter.second->snapshot_delete(to_delete); } + return found ? FENIX_SUCCESS : FENIX_ERROR_INVALID_TIMESTAMP; +} - //Dont forget to clear the commit buffer - if(recovery_locally_possible) mentry->data_regions[mentry->current_head].specifier = __FENIX_SUBSET_EMPTY; - +int Group::barrier(){return 0;} - return retval; +int Group::get_number_of_snapshots(int* num){ + *num = timestamps.size(); + return FENIX_SUCCESS; } -int __imr_member_lrestore(fenix_group_t* g, int member_id, - void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found){ - int retval = -1; +int Group::get_snapshot_at_position(int idx, int* snapshot){ + if(idx >= timestamps.size() || idx < 0) + return FENIX_ERROR_INVALID_POSITION; - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - - fenix_imr_mentry_t* mentry; - //find_mentry returns the error status. We found the member (and corresponding data) if there are no errors. - int found_member = !(__imr_find_mentry(group, member_id, &mentry)); + *snapshot = timestamps[idx]; + return FENIX_SUCCESS; +} + +int Group::member_restore( + int member_id, void* target_buffer, int max_count, int ts, + DataSubset& data_found +){ + //TODO: Is this fix needed anymore? + //One-time fix after a reinit. + if(timestamp == -1 && !timestamps.empty()) + timestamp = timestamps.back(); - if(!found_member){ - return FENIX_ERROR_INVALID_MEMBERID; - } + IMR::Member* member = find_member(member_id); - int member_data_index = __fenix_search_memberid(&group->base, member_id); - fenix_member_entry_t member_data = group->base.members[member_data_index]; - + std::vector found_members(set_size); + found_members[set_rank] = member ? 1 : 0; + int allgather_ret = MPI_Allgather( + MPI_IN_PLACE, 1, MPI_INT, found_members.data(), 1, MPI_INT, set_comm + ); - int return_found_data; - if(data_found == NULL){ - data_found = (Fenix_Data_subset*) malloc(sizeof(Fenix_Data_subset)); - return_found_data = 0; - } else { - return_found_data = 1; + int n_missing = 0; + int first_found = -1, missing_rank = -1; + for(int i = 0; i < found_members.size(); i++){ + if(!found_members[i]){ + n_missing++; + missing_rank = i; + } + if(found_members[i] && first_found == -1) + first_found = i; } - __fenix_data_subset_init(1, data_found); - data_found->specifier = __FENIX_SUBSET_EMPTY; + if(n_missing > 1){ + if(set_rank != 0) return FENIX_ERROR_INVALID_MEMBERID; - - int oldest_snapshot; - for(oldest_snapshot = (mentry->current_head - 1); oldest_snapshot >= 0; oldest_snapshot--){ - __fenix_data_subset_merge_inplace(data_found, mentry->data_regions + oldest_snapshot); - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - //The snapshots have formed a full set of data, not need to add older snapshots. - break; + if(n_missing == set_size){ + debug_print( + "ERROR Fenix_Data_member_restore: %s member_id %d not found\n", + this->str().c_str(), member_id + ); + } else { + debug_print( + "ERROR Fenix_Data_member_restore: %s member_id %d unrecoverable\n", + this->str().c_str(), member_id + ); + } + return FENIX_ERROR_INVALID_MEMBERID; + } else if(n_missing == 1){ + fenix_member_entry_packet_t packet; + if(set_rank == first_found) packet = member->mentry.to_packet(); + + MPI_Bcast( + &packet, sizeof(packet), MPI_BYTE, first_found, set_comm + ); + + if(!found_members[set_rank]){ + __fenix_member_create( + groupid, packet.memberid, target_buffer, packet.current_count, + packet.datatype_size + ); + member = find_member(member_id); + assert(member); } - } - - //If there isn't a full set of data, don't try to pull from nonexistent snapshot. - if(oldest_snapshot == -1){ - oldest_snapshot = 0; - } - - for(int i = oldest_snapshot; i < mentry->current_head; i++){ - __fenix_data_subset_copy_data(&mentry->data_regions[i], target_buffer, - mentry->data[i], member_data.datatype_size, member_data.current_count); - } - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - retval = FENIX_SUCCESS; - } else { - retval = FENIX_WARNING_PARTIAL_RESTORE; } - //Dont forget to clear the commit buffer - mentry->data_regions[mentry->current_head].specifier = __FENIX_SUBSET_EMPTY; + member->restore(); - return retval; + send_buf.clear(); + send_buf.shrink_to_fit(); + recv_buf.clear(); + recv_buf.shrink_to_fit(); + return member->lrestore((char*)target_buffer, max_count, ts, data_found); +} + +int Group::member_lrestore( + int member_id, void* target_buffer, int max_count, int ts, + DataSubset& data_found +){ + auto iter = member_data.find(member_id); + if(iter == member_data.end()) return FENIX_ERROR_INVALID_MEMBERID; + return iter->second->lrestore((char*)target_buffer, max_count, ts, data_found); } -int __imr_member_restore_from_rank(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, +int Group::member_restore_from_rank(int member_id, + void* target_buffer, int max_count, int timestamp, int source_rank){return 0;} -int __imr_member_get_attribute(fenix_group_t* group, fenix_member_entry_t* member, +int Group::member_get_attribute(fenix_member_entry_t* member, int attributename, void* attributevalue, int* flag, int sourcerank){return 0;} -int __imr_member_set_attribute(fenix_group_t* g, fenix_member_entry_t* member, +int Group::member_set_attribute(fenix_member_entry_t* member, int attributename, void* attributevalue, int* flag){ //No mutable attributes (as of now) require any changes to this policy's info return FENIX_SUCCESS; } -int __imr_reinit(fenix_group_t* g, int* flag){ - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - - if(group->raid_mode == 5){ - //Rebuild the set comm to re-include the failed node(s). - MPI_Group comm_group, set_group; - MPI_Comm_group(g->comm, &comm_group); - MPI_Group_incl(comm_group, group->set_size, group->partners, &set_group); - MPI_Comm_create_group(g->comm, set_group, 0, &(group->set_comm)); - } - - __imr_sync_timestamps(group); +int Group::reinit(int* flag){ + build_set_comm(); + sync_timestamps(); *flag = FENIX_SUCCESS; - - return FENIX_SUCCESS; + return *flag; } -void __imr_sync_timestamps(fenix_imr_group_t* group){ - int n_snapshots = group->num_snapshots; +void Group::sync_timestamps(){ + int n_snapshots = timestamps.size(); - if(group->raid_mode == 1){ - int partner_snapshots; - MPI_Sendrecv(&n_snapshots, 1, MPI_INT, group->partners[0], 34560, - &partner_snapshots, 1, MPI_INT, group->partners[1], 34560, - group->base.comm, MPI_STATUS_IGNORE); - n_snapshots = n_snapshots > partner_snapshots ? n_snapshots : partner_snapshots; - - MPI_Sendrecv(&n_snapshots, 1, MPI_INT, group->partners[1], 34561, - &partner_snapshots, 1, MPI_INT, group->partners[0], 34561, - group->base.comm, MPI_STATUS_IGNORE); - n_snapshots = n_snapshots > partner_snapshots ? n_snapshots : partner_snapshots; - } else { - MPI_Allreduce(MPI_IN_PLACE, &n_snapshots, 1, MPI_INT, MPI_MAX, group->set_comm); - } + MPI_Allreduce(MPI_IN_PLACE, &n_snapshots, 1, MPI_INT, MPI_MAX, set_comm); - bool need_reset = group->num_snapshots != n_snapshots; - for(int i = group->num_snapshots; i < n_snapshots; i++) group->timestamps[i] = -1; + bool need_reset = timestamps.size() != n_snapshots; + for(int i = timestamps.size(); i < n_snapshots; i++) timestamps.push_front(-1); - if(group->raid_mode == 1){ - int* p0_stamps = (int*)malloc(sizeof(int)*n_snapshots); - int* p1_stamps = (int*)malloc(sizeof(int)*n_snapshots); - - MPI_Sendrecv(group->timestamps, n_snapshots, MPI_INT, group->partners[1], 34562, - p0_stamps, n_snapshots, MPI_INT, group->partners[0], 34562, - group->base.comm, MPI_STATUS_IGNORE); - MPI_Sendrecv(group->timestamps, n_snapshots, MPI_INT, group->partners[0], 34563, - p1_stamps, n_snapshots, MPI_INT, group->partners[1], 34563, - group->base.comm, MPI_STATUS_IGNORE); - - for(int i = 0; i < n_snapshots; i++){ - int old_stamp = group->timestamps[i]; - group->timestamps[i] = group->timestamps[i] > p0_stamps[i] ? group->timestamps[i] : p0_stamps[i]; - group->timestamps[i] = group->timestamps[i] > p1_stamps[i] ? group->timestamps[i] : p1_stamps[i]; - - need_reset |= group->timestamps[i] != old_stamp; - } - - free(p0_stamps); - free(p1_stamps); - } else { - MPI_Allreduce(MPI_IN_PLACE, group->timestamps, n_snapshots, MPI_INT, MPI_MAX, group->set_comm); - } - - group->num_snapshots = n_snapshots; - if(n_snapshots > 0) group->base.timestamp = group->timestamps[n_snapshots-1]; - else group->base.timestamp = -1; - - //Now fix members - if(need_reset && group->entries_count > 0) { - if(fenix.options.verbose == 1){ - verbose_print("Outdated timestamps on rank %d. All members will require full recovery.\n", - group->base.current_rank); - } - //For now, just delete all members and assume partner(s) can - //help me rebuild fully consistent state - for(int i = group->entries_count-1; i >= 0; i--){ - int memberid = group->entries[i].memberid; - Fenix_Data_member_delete(group->base.groupid, memberid); - } - } + std::vector ts = {timestamps.begin(), timestamps.end()}; + MPI_Allreduce( + MPI_IN_PLACE, ts.data(), n_snapshots, MPI_INT, MPI_MAX, set_comm + ); + timestamps = {ts.begin(), ts.end()}; + + if(!timestamps.empty()) timestamp = timestamps.back(); + else timestamp = -1; } -int __imr_get_redundant_policy(fenix_group_t* group, int* policy_name, - void* policy_value, int* flag){ - int retval = FENIX_SUCCESS; +int Group::get_redundant_policy(int* policy_name, void* policy_value, int* flag){ *policy_name = FENIX_DATA_POLICY_IN_MEMORY_RAID; - fenix_imr_group_t* full_group = (fenix_imr_group_t *)group; int* policy_vals = (int*) policy_value; - policy_vals[0] = full_group->raid_mode; - policy_vals[1] = full_group->rank_separation; + policy_vals[0] = mode; + policy_vals[1] = rank_separation; + if(mode == 5) policy_vals[2] = set_size; *flag = FENIX_SUCCESS; - return retval; + return *flag; } -int __imr_group_delete(fenix_group_t* g){ - fenix_imr_group_t* group = (fenix_imr_group_t*) g; - - for(int entry = 0; entry < group->base.members.size(); entry++){ - __imr_member_free(group->entries+entry, g->depth); - } - free(group->entries); - - free(group->partners); - delete group; +int Group::group_delete(){ + delete this; return FENIX_SUCCESS; } +} // namespace Fenix::IMR diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index b28da79..0f8fb7d 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -61,10 +61,11 @@ #include "fenix_opt.hpp" #include "fenix_util.hpp" #include "fenix_ext.hpp" +#include "fenix_data_subset.hpp" #include -using namespace Fenix::Data; +namespace Fenix::Data { /** * @brief create new group or recover group data for lost processes @@ -155,7 +156,7 @@ int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, //Reinit group metadata as needed w/ new communicator. - group->vtbl.reinit(group, flag); + group->reinit(flag); } @@ -175,7 +176,7 @@ int __fenix_group_get_redundancy_policy(int groupid, int* policy_name, int* poli retval = FENIX_ERROR_INVALID_GROUPID; } else { fenix_group_t* group = fenix.data_recovery->group[group_index]; - retval = group->vtbl.get_redundant_policy(group, policy_name, policy_value, flag); + retval = group->get_redundant_policy(policy_name, policy_value, flag); } return retval; @@ -212,7 +213,7 @@ int __fenix_member_create(int groupid, int memberid, void *data, int count, int mentry = __fenix_data_member_add_entry(group, memberid, data, count, datatype_size); //Pass the info along to the policy - return group->vtbl.member_create(group, mentry); + return group->member_create(mentry); } @@ -269,35 +270,24 @@ int __fenix_data_test(Fenix_Request request, int *flag) { * */ -int __fenix_member_store(int groupid, int memberid, Fenix_Data_subset specifier) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - - /* Check if the member id already exists. If so, the index of the storage space is assigned */ - if (group_index !=-1 && memberid != FENIX_DATA_MEMBER_ALL) { - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid ); - } - - if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { - verbose_print( - "c-rank: %d, role: %d, group_index: %d, member_index: %d memberid: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index, memberid); +int __fenix_member_store(int groupid, int memberid, const DataSubset& specifier) { + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; } + + return group->member_store(memberid, specifier); +} - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_store(group, memberid, specifier); +int __fenix_member_storev(int groupid, int memberid, const DataSubset& specifier) { + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_storev: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; } - return retval; + + return group->member_storev(memberid, specifier); } /** @@ -307,41 +297,18 @@ int __fenix_member_store(int groupid, int memberid, Fenix_Data_subset specifier) * @param subset_specifier * @param request */ -int __fenix_member_istore(int groupid, int memberid, Fenix_Data_subset specifier, +int __fenix_member_istore(int groupid, int memberid, const DataSubset& specifier, Fenix_Request *request) { - - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - - /* Check if the member id already exists. If so, the index of the storage space is assigned */ - if (group_index !=-1 && memberid != FENIX_DATA_MEMBER_ALL) { - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid ); - } - - if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { - verbose_print( - "c-rank: %d, role: %d, group_index: %d, member_index: %d memberid: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index, memberid); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_istore(group, memberid, specifier, request); + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_istore: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; } - return retval; + + return group->member_istore(memberid, specifier, request); } - /** * @brief * @param group_id @@ -364,7 +331,7 @@ int __fenix_data_commit(int groupid, int *timestamp) { if (group->timestamp != -1) group->timestamp++; else group->timestamp = group->timestart; - group->vtbl.commit(group); + group->commit(); if (timestamp != NULL) { *timestamp = group->timestamp; @@ -413,7 +380,7 @@ int __fenix_data_commit_barrier(int groupid, int *timestamp) { if(can_commit == 1){ if (group->timestamp != -1) group->timestamp++; else group->timestamp = group->timestart; - retval = group->vtbl.commit(group); + retval = group->commit(); } @@ -440,16 +407,13 @@ int __fenix_data_commit_barrier(int groupid, int *timestamp) { * @param max_count * @param time_stamp */ -int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, int timestamp, Fenix_Data_subset* data_found) { - +int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { int retval = FENIX_SUCCESS; int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); - int member_index = -1; - - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); - if (fenix.options.verbose == 25) { + int member_index = -1; + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, member_index); @@ -461,7 +425,7 @@ int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, retval = FENIX_ERROR_INVALID_GROUPID; } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_restore(group, memberid, data, maxcount, timestamp, data_found); + retval = group->member_restore(memberid, data, maxcount, timestamp, data_found); } return retval; } @@ -474,7 +438,7 @@ int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, * @param max_count * @param time_stamp */ -int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, Fenix_Data_subset* data_found) { +int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { int retval = FENIX_SUCCESS; int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); @@ -495,7 +459,7 @@ int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, retval = FENIX_ERROR_INVALID_GROUPID; } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_lrestore(group, memberid, data, maxcount, timestamp, data_found); + retval = group->member_lrestore(memberid, data, maxcount, timestamp, data_found); } return retval; } @@ -529,7 +493,7 @@ int __fenix_member_restore_from_rank(int groupid, int memberid, void *target_buf retval = FENIX_ERROR_INVALID_GROUPID; } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_restore_from_rank(group, memberid, target_buffer, + retval = group->member_restore_from_rank(memberid, target_buffer, max_count, time_stamp, source_rank); } return retval; @@ -560,14 +524,15 @@ int __fenix_get_member_at_position(int group_id, int *member_id, int position) { auto [group_index, group] = find_group(group_id); if(!group) return FENIX_ERROR_INVALID_GROUPID; - if(position < 0 || position > group->members.size()){ + if(position < 0 || position >= group->members.size()){ debug_print( "ERROR Fenix_Data_group_get_member_at_position: position <%d> must be a value between 0 and number_of_members-1 \n", position); return FENIX_ERROR_INVALID_POSITION; } - - *member_id = group->members[position].memberid; + auto iter = group->members.begin(); + std::advance(iter, position); + *member_id = iter->first; return FENIX_SUCCESS; } @@ -584,7 +549,7 @@ int __fenix_get_number_of_snapshots(int group_id, int *num_snapshots) { retval = FENIX_ERROR_INVALID_GROUPID; } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.get_number_of_snapshots(group, num_snapshots); + retval = group->get_number_of_snapshots(num_snapshots); } return retval; } @@ -635,7 +600,7 @@ int __fenix_member_get_attribute(int groupid, int memberid, int attributename, member_index); } - return group->vtbl.member_get_attribute(group, mentry, attributename, + return group->member_get_attribute(mentry, attributename, attributevalue, flag, sourcerank); } @@ -667,12 +632,12 @@ int __fenix_member_set_attribute(int groupid, int memberid, int attributename, //Always pass attribute changes along to group - they might have unknown attributes //or side-effects to handle from changes. They get change info before //changes are made, in case they need prior state. - int retval = group->vtbl.member_set_attribute(group, mentry, attributename, + int retval = group->member_set_attribute(mentry, attributename, attributevalue, flag); switch (attributename) { case FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER: - mentry->user_data = attributevalue; + mentry->user_data = (char*)attributevalue; break; case FENIX_DATA_MEMBER_ATTRIBUTE_COUNT: mentry->current_count = *((int *) (attributevalue)); @@ -725,7 +690,9 @@ int __fenix_snapshot_delete(int group_id, int time_stamp) { retval = FENIX_ERROR_INVALID_TIMESTAMP; } else { fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.snapshot_delete(group, time_stamp); + retval = group->snapshot_delete(time_stamp); } return retval; } + +} // namespace Fenix::Data diff --git a/src/fenix_data_subset.cpp b/src/fenix_data_subset.cpp index 430fc1e..b88e223 100644 --- a/src/fenix_data_subset.cpp +++ b/src/fenix_data_subset.cpp @@ -59,752 +59,769 @@ #include "fenix_opt.hpp" #include "fenix_util.hpp" #include "fenix_data_subset.h" +#include "fenix_data_subset.hpp" +namespace Fenix { +namespace Detail { -int __fenix_data_subset_init(int num_blocks, Fenix_Data_subset* subset){ - int retval = -1; - if(num_blocks <= 0){ - debug_print("ERROR __fenix_data_subset_init: num_regions <%d> must be positive\n", - num_blocks); - } else { - subset->start_offsets = (int*) s_malloc(sizeof(int) * num_blocks); - subset->end_offsets = (int*) s_malloc(sizeof(int) * num_blocks); - subset->num_repeats = (int*) s_calloc(num_blocks, sizeof(int)); - subset->num_blocks = num_blocks; - retval = FENIX_SUCCESS; - } - return retval; -} - -/** - * @brief - * @param num_blocks - * @param start_offset - * @param end_offset - * @param stride - * @param subset_specifier - * - * This routine creates - */ -int __fenix_data_subset_create(int num_blocks, int start_offset, int end_offset, int stride, - Fenix_Data_subset *subset_specifier) { - int retval = -1; - if (num_blocks <= 0) { - debug_print("ERROR Fenix_Data_subset_create: num_blocks <%d> must be positive\n", - num_blocks); - retval = FENIX_ERROR_SUBSET_NUM_BLOCKS; - } else if (start_offset < 0) { - debug_print("ERROR Fenix_Data_subset_create: start_offset <%d> must be positive\n", - start_offset); - retval = FENIX_ERROR_SUBSET_START_OFFSET; - } else if (end_offset < 0) { - debug_print("ERROR Fenix_Data_subset_create: end_offset <%d> must be positive\n", - end_offset); - retval = FENIX_ERROR_SUBSET_END_OFFSET; - } else if (stride <= 0) { - debug_print("ERROR Fenix_Data_subset_create: stride <%d> must be positive\n", stride); - retval = FENIX_ERROR_SUBSET_STRIDE; - } else { - //This is a simple subset with a single region descriptor that simply - //repeats num_blocks times. - __fenix_data_subset_init(1 /*Only 1 block, repeated*/, subset_specifier); - - subset_specifier->start_offsets[0] = start_offset; - subset_specifier->end_offsets[0] = end_offset; - subset_specifier->num_repeats[0] = num_blocks-1; - subset_specifier->stride = stride; - subset_specifier->specifier = __FENIX_SUBSET_CREATE; - retval = FENIX_SUCCESS; - } - return retval; -} - -/** - * @brief - * @param num_blocks - * @param array_start_offsets - * @param array_end_offsets - * @param subset_specifier - */ -int __fenix_data_subset_createv(int num_blocks, int *array_start_offsets, int *array_end_offsets, - Fenix_Data_subset *subset_specifier) { - - int retval = -1; - if (num_blocks <= 0) { - debug_print("ERROR Fenix_Data_subset_createv: num_blocks <%d> must be positive\n", - num_blocks); - retval = FENIX_ERROR_SUBSET_NUM_BLOCKS; - } else if (array_start_offsets == NULL) { - debug_print( "ERROR Fenix_Data_subset_createv: array_start_offsets %s must be at least of size 1\n", ""); - retval = FENIX_ERROR_SUBSET_START_OFFSET; - } else if (array_end_offsets == NULL) { - debug_print( "ERROR Fenix_Data_subset_createv: array_end_offsets %s must at least of size 1\n", ""); - retval = FENIX_ERROR_SUBSET_END_OFFSET; - } else { - - // first check that the start offsets and end offsets are valid - int index; - int invalid_index = -1; - int found_invalid_index = 0; - for (index = 0; found_invalid_index != 1 && (index < num_blocks); index++) { - if (array_start_offsets[index] > array_end_offsets[index]) { - invalid_index = index; - found_invalid_index = 1; - } - } +std::pair DataRegion::range() const { + return {start, reps == MAX ? MAX : end+stride*reps}; +} - if (found_invalid_index != 1) { // if not true (!= 1) - __fenix_data_subset_init(num_blocks, subset_specifier); +size_t DataRegion::count() const { + if(end == MAX || reps == MAX) return MAX; + return (end-start+1)*(reps+1); +} - memcpy(subset_specifier->start_offsets, array_start_offsets, ( num_blocks * sizeof(int))); // deep copy - memcpy(subset_specifier->end_offsets, array_end_offsets, ( num_blocks * sizeof(int))); // deep copy - - subset_specifier->specifier = __FENIX_SUBSET_CREATEV; - subset_specifier->stride = 0; - retval = FENIX_SUCCESS; - } else { - debug_print( - "ERROR Fenix_Data_subset_createv: array_end_offsets[%d] must be less than array_start_offsets[%d]\n", - invalid_index, invalid_index); - retval = FENIX_ERROR_SUBSET_END_OFFSET; - } - } - return retval; -} - -//This should only be used to copy to a currently non-inited subset -// If the destination already has memory allocated in the num_blocks/offsets regions -// then this can lead to memory leaks. -// For the sake of consistent memory management, this will always return a subset with -// a valid memory allocation for each pointer in the subset. -void __fenix_data_subset_deep_copy(Fenix_Data_subset* from, Fenix_Data_subset* to){ - if(from->specifier == __FENIX_SUBSET_FULL || from->specifier == __FENIX_SUBSET_EMPTY){ - __fenix_data_subset_init(1, to); - to->specifier = from->specifier; - } else { - __fenix_data_subset_init(from->num_blocks, to); - memcpy(to->num_repeats, from->num_repeats, to->num_blocks*sizeof(int)); - memcpy(to->start_offsets, from->start_offsets, to->num_blocks*sizeof(int)); - memcpy(to->end_offsets, from->end_offsets, to->num_blocks*sizeof(int)); - to->specifier = from->specifier; - to->stride = from->stride; - } -} - -//This function checks for any overlapping regions and removes them. -void __fenix_data_subset_simplify_regions(Fenix_Data_subset* ss){ - int space_allocated = ss->num_blocks; - - if(ss->specifier == __FENIX_SUBSET_CREATE){ - //We will handle this by viewing the data as regions of size stride. - //Each block will be broken into a value dictating which regions it is - //within, and what data within each region it is within. - // - //If two blocks do not overlap within regions, there is no overlap. - //If they overlap within regions, but the regions they touch do not overlap, - //there is no overlap. etc. - - for(int i = 0; i < ss->num_blocks-1; i++){ - int did_merge = 0; - - for(int j = i+1; j < ss->num_blocks; j++){ - //We will simplify the logic by switching from i and j referencing - //to viewing the two blocks in the order that they exist in the data. - int first_block; - int second_block; - - if(ss->start_offsets[i] < ss->start_offsets[j]){ - first_block = i; - second_block = j; - } else { - first_block = j; - second_block = i; - } +bool DataRegion::operator==(const DataRegion& other) const { + return start == other.start && end == other.end && reps == other.reps && + stride == other.stride; +} - //Check for the case that the merged and unmerged regions are the same. - int merged_same_as_unmerged = ((ss->start_offsets[first_block]%ss->stride) == (ss->start_offsets[second_block]%ss->stride) - && (ss->end_offsets[first_block]%ss->stride) == (ss->end_offsets[second_block]%ss->stride)); - int merged_same_as_first = 0, merged_same_as_second = 0; +bool DataRegion::operator&&(const DataRegion& b) const { + const auto& a = *this; - - // We want the smallest x | (first_block_end + stride * x >= second_block_start) - // As this gives us which repetition an overlap is first possible on. - // Simplify to x >= (second_block_start - first_block_end)/s - // We want the lowest, so swap >= with =, and since we need an integer we'll round up. - int first_intersecting_repetition, option2; - if(ss->start_offsets[second_block] - ss->end_offsets[first_block] > 0){ - first_intersecting_repetition = (ss->start_offsets[second_block] - ss->end_offsets[first_block] - 1)/ss->stride + 1; - // = ceil( (ss->start_offsets[second_block] - ss->end_offsets[first_block]) / ss->stride) - } else { - first_intersecting_repetition = 0; - } - - // The above only accounts for one of two cases of intersection. There other is provided by option 2. - if(ss->end_offsets[second_block] - ss->end_offsets[first_block] > 0){ - option2 = (ss->end_offsets[second_block] - ss->end_offsets[first_block] - 1)/ss->stride + 1; - } else { - option2 = 0; - } + auto [astart, aend] = a.range(); + auto [bstart, bend] = b.range(); + if(astart > bend || bstart > aend) return false; + else if(!a.reps || !b.reps) return true; - if(merged_same_as_unmerged){ - //If there's no difference in merged/unmerged, we can 'skip' a stride - //and it'll al still be the same. - if(!( first_intersecting_repetition <= ss->num_repeats[first_block]+1 - || option2 <= ss->num_repeats[first_block]+1 )){ - //Both still require too high a repetition than we have. No overlap. - continue; - } - } else { - if(!( first_intersecting_repetition <= ss->num_repeats[first_block] - || option2 <= ss->num_repeats[first_block] )){ - //Both require too high a repetition than we have. No overlap. - continue; - } - } + //Both are strided. For now, only allow same-stride operations + fenix_assert(a.stride == b.stride); - merged_same_as_first = ss->start_offsets[first_block] + ss->stride*first_intersecting_repetition <= ss->start_offsets[second_block] - && ss->end_offsets[first_block] + ss->stride*first_intersecting_repetition >= ss->end_offsets[second_block]; - merged_same_as_second = ss->start_offsets[second_block] + ss->stride*first_intersecting_repetition <= ss->start_offsets[first_block] - && ss->end_offsets[second_block] + ss->stride*first_intersecting_repetition >= ss->end_offsets[first_block]; - - //We have found the smallest overlap candidate, now we see if there is overlap there. - int blocks_overlap; - if(first_intersecting_repetition < option2){ - blocks_overlap = ( (ss->stride * first_intersecting_repetition + ss->start_offsets[first_block]) <= ss->start_offsets[second_block]); - } else { - first_intersecting_repetition = option2; - blocks_overlap = ( (ss->stride * first_intersecting_repetition + ss->start_offsets[first_block]) <= ss->end_offsets[second_block]); - } + //Get a big modularly idempotent number to avoid negatives + size_t idem = (std::max(aend, bend)/stride + 1)*stride; - if(!blocks_overlap){ - continue; - } + //Start of possible overlap + size_t ostart = std::max(astart, bstart); + size_t arep = (ostart-astart)/stride; + size_t brep = (ostart-bstart)/stride; + DataRegion ablock = a.get_rep(arep); + DataRegion bblock = b.get_rep(brep); + + if(ablock && bblock) return true; + if(arep < a.reps && (bblock && a.get_rep(arep+1))) return true; + if(brep < b.reps && (ablock && b.get_rep(brep+1))) return true; + return false; +} - int length_first_only_start; - int length_first_only_end; - int length_both; - int length_second_only; - int merged_start; - int merged_end; - - - length_first_only_start = first_intersecting_repetition; - - length_first_only_start = length_first_only_start > (ss->num_repeats[i] + 1) ? - (ss->num_repeats[i] + 1) : length_first_only_start; - - int remaining_first_repetitions = ss->num_repeats[first_block] + 1 - length_first_only_start; - if(remaining_first_repetitions > ss->num_repeats[second_block]+1){ - length_both = ss->num_repeats[second_block] + 1; - length_second_only = 0; - length_first_only_end = ss->num_repeats[first_block]+1 - length_first_only_start - length_both; - } else { - length_both = remaining_first_repetitions; - length_second_only = ss->num_repeats[second_block]+1 - remaining_first_repetitions; - length_first_only_end = 0; - } - - if(merged_same_as_unmerged){ - length_both = length_both + length_first_only_end + length_first_only_start + length_second_only; - length_first_only_start = length_first_only_end = length_second_only = 0; - } else if(merged_same_as_first){ - length_both = length_both + length_first_only_end + length_first_only_start; +bool DataRegion::operator<(const DataRegion& other) const { + if(start != other.start) return start < other.start; + if(end != other.end) return end < other.end; + if(reps != other.reps) return reps < other.reps; + return stride < other.stride; +} - length_first_only_start = length_first_only_end = 0; - } else if(merged_same_as_second){ - length_both = length_both + length_both; +std::set DataRegion::operator&(const DataRegion& b) const { + fenix_assert(!reps || !b.reps); + if(b.reps) return b & *this; - length_both = 0; - } - - //Record info for merged region before we overwrite data we need. - merged_start = ss->stride*length_first_only_start + ss->start_offsets[first_block]; - merged_start = merged_start < ss->start_offsets[second_block] ? - merged_start : ss->start_offsets[second_block]; - - merged_end = ss->stride*length_first_only_start + ss->end_offsets[first_block]; - merged_end = merged_end < ss->end_offsets[second_block] ? - merged_end : ss->end_offsets[second_block]; + if(!(*this && b)){ + return {}; + } else if(!reps){ + return {DataRegion({std::max(start, b.start), std::min(end, b.end)})}; + } - //Now we know what the overlap is, so we make the changes to the data subset. - int store_index = 0; - int store_locations[3] = {first_block, second_block, ss->num_blocks}; + std::set ret; + + size_t start_rep = 0; + if(b.start > start){ + start_rep = (b.start-start)/stride; + auto block = get_rep(start_rep); + auto valid = block & b; + if(!valid.count(block)){ + ret.merge(valid); + if(start_rep == reps) return ret; + else start_rep++; + } + } - if(length_first_only_start > 0){ - ss->num_repeats[first_block] = length_first_only_start - 1; - store_index++; - } - if(length_first_only_end > 0){ - ss->num_repeats[store_locations[store_index]] = length_first_only_end-1; - ss->start_offsets[store_locations[store_index]] = ss->stride*(length_first_only_start+length_both) + - ss->start_offsets[first_block]; - ss->end_offsets[store_locations[store_index]] = ss->stride*(length_first_only_start+length_both) + - ss->end_offsets[first_block]; - store_index++; - } else if(length_second_only > 0){ - ss->num_repeats[store_locations[store_index]] = length_second_only-1; - ss->start_offsets[store_locations[store_index]] = ss->stride*(length_both) + - ss->start_offsets[second_block]; - ss->end_offsets[store_locations[store_index]] = ss->stride*(length_both) + - ss->end_offsets[second_block]; - store_index++; - } + size_t end_rep = reps; + if(b.end < range().second){ + end_rep = (b.end-start)/stride; + if(end_rep < start_rep) return ret; + + auto block = get_rep(end_rep); + auto valid = block & b; + if(!valid.count(block)){ + ret.merge(valid); + if(end_rep == start_rep) return ret; + else end_rep--; + } + } - //There is always a merged region to add. - if(store_index == 2){ - //We're adding a new block, so we need to make sure we have allocated - //enough memory space. - ss->num_blocks++; - if(ss->num_blocks > space_allocated){ - - ss->end_offsets = (int*) s_realloc(ss->end_offsets, - (space_allocated * 2) * sizeof(int)); - ss->start_offsets = (int*) s_realloc(ss->start_offsets, - (space_allocated * 2) * sizeof(int)); - ss->num_repeats = (int*) s_realloc(ss->num_repeats, - (space_allocated * 2) * sizeof(int)); - space_allocated *= 2; - } + ret.insert(get_reps(start_rep, end_rep)); + return ret; +} - } - ss->start_offsets[store_locations[store_index]] = merged_start; - ss->end_offsets[store_locations[store_index]] = merged_end; - ss->num_repeats[store_locations[store_index]] = length_both - 1; - store_index++; - - - //Check if num_repeats[second_block] < 0, if so remove it. - //This could occur if both blocks can be perfectly minimized to a single block. - if(store_index == 1){ - if(second_block == ss->num_blocks-1){ - //Don't need to move anything. - ss->num_blocks--; - } else { - //We need to move everything over by one. - memmove(ss->num_repeats + second_block, ss->num_repeats + second_block + 1, - ss->num_blocks - second_block - 1); - memmove(ss->start_offsets + second_block, ss->start_offsets + second_block + 1, - ss->num_blocks - second_block - 1); - memmove(ss->end_offsets + second_block, ss->end_offsets + second_block + 1, - ss->num_blocks - second_block - 1); - ss->num_blocks--; - } - } - - did_merge = 1; - } +DataRegion DataRegion::get_rep(size_t n) const { + fenix_assert(n <= reps); + return DataRegion({start+n*stride, end+n*stride}); +} - //If we merged w/ anything, recheck w/ new merged block. - if(did_merge) i--; - } - } else if(ss->specifier == __FENIX_SUBSET_CREATEV){ - //This is much simpler than with CREATE type, since we don't have to - //worry about repetition. - for(int i = 0; i < ss->num_blocks-1; i++){ - int did_merge = 0; - - for(int j = i+1; j < ss->num_blocks; j++){ - if( ss->start_offsets[i] <= ss->end_offsets[j]+1 && - ss->end_offsets[i] >= ss->start_offsets[j]-1){ - did_merge = 1; - - ss->start_offsets[i] = (ss->start_offsets[i] < ss->start_offsets[j]) ? - ss->start_offsets[i] : - ss->start_offsets[j]; - - ss->end_offsets[i] = (ss->end_offsets[i] > ss->end_offsets[j]) ? - ss->end_offsets[i] : - ss->end_offsets[j]; - - //Move everything over to remove j - memmove(ss->start_offsets + j, ss->start_offsets + j + 1, - (ss->num_blocks - j - 1) * sizeof(int)); - memmove(ss->end_offsets + j, ss->end_offsets + j + 1, - (ss->num_blocks - j - 1) * sizeof(int)); - ss->num_blocks--; - } - } +DataRegion DataRegion::get_reps(size_t first, size_t last) const { + fenix_assert(first <= last); + fenix_assert(last <= reps); + return DataRegion( + {start+first*stride, end+first*stride}, last-first, stride + ); +} - if(did_merge) i--; - } - } +DataRegion DataRegion::inverted() const { + fenix_assert(reps); + return DataRegion({end+1, start+stride-1}, reps == MAX ? MAX : reps-1, stride); +} - if(space_allocated > ss->num_blocks){ - ss->end_offsets = (int*) s_realloc(ss->end_offsets, - ss->num_blocks * sizeof(int)); - ss->start_offsets = (int*) s_realloc(ss->start_offsets, - ss->num_blocks * sizeof(int)); - ss->num_repeats = (int*) s_realloc(ss->num_repeats, - ss->num_blocks * sizeof(int)); +std::optional DataRegion::try_merge(const DataRegion& b) const { + if(b < *this){ + return b.try_merge(*this); + } else if(range().second == MAX){ + return {}; + } else if(!reps && !b.reps){ + if(end+1 == b.start) return DataRegion({start, b.end}); + else return {}; + } else { + size_t s = reps ? stride : b.stride; + if(start+s*(reps+1) == b.start && end+s*(reps+1) == b.end) + return DataRegion({start, end}, reps+b.reps+1, s); + else return {}; } +} +void merge_adjacent_sets(std::set& a, std::set& b){ + if(a.empty()){ + a = std::move(b); + b = {}; + } + while(!b.empty()){ + auto merged = (--a.end())->try_merge(*b.begin()); + if(!merged){ + a.merge(b); + b.clear(); + } else { + a.erase(a.end()--); + b.erase(b.begin()); + a.insert(*merged); + } + } + while(a.size() > 1){ + auto merged = (----a.end())->try_merge(*(--a.end())); + if(!merged) break; + a.erase(a.end()--); + a.erase(a.end()--); + a.insert(*merged); + } } -//This should only be used to copy to a currently non-inited subset -// If the destination already has memory allocated in the num_blocks/offsets regions -// then this can lead to double-mallocs or memory leaks. -void __fenix_data_subset_merge(Fenix_Data_subset* first_subset, Fenix_Data_subset* second_subset, - Fenix_Data_subset* output){ - - //Simple cases first - if(first_subset->specifier == __FENIX_SUBSET_FULL || - second_subset->specifier == __FENIX_SUBSET_FULL){ - //We don't need to populate anything else. - output->specifier = __FENIX_SUBSET_FULL; - //We still have to init, else there will be a memory error when the user tries to free later. - __fenix_data_subset_init(1, output); - - } else if(first_subset->specifier == __FENIX_SUBSET_EMPTY){ - __fenix_data_subset_deep_copy(second_subset, output); - - } else if(second_subset->specifier == __FENIX_SUBSET_EMPTY){ - __fenix_data_subset_deep_copy(first_subset, output); - - } else if(first_subset->specifier == __FENIX_SUBSET_CREATE && - second_subset->specifier == __FENIX_SUBSET_CREATE && - first_subset->stride == second_subset->stride){ - //Output is just a CREATE type with combined descriptors. - //Start by making a list of all descriptors, then merge any with overlaps. - output->stride = first_subset->stride; - output->num_blocks = first_subset->num_blocks - + second_subset->num_blocks; - __fenix_data_subset_init(output->num_blocks, output); - output->specifier = __FENIX_SUBSET_CREATE; - - memcpy(output->num_repeats, first_subset->num_repeats, first_subset->num_blocks * sizeof(int)); - memcpy(output->num_repeats+first_subset->num_blocks, second_subset->num_repeats, - second_subset->num_blocks * sizeof(int)); +std::set DataRegion::operator-(const DataRegion& b) const { + if(!(*this && b)) return {*this}; + fenix_assert(!reps || !b.reps || stride == b.stride); - memcpy(output->start_offsets, first_subset->start_offsets, first_subset->num_blocks * sizeof(int)); - memcpy(output->start_offsets+first_subset->num_blocks, second_subset->start_offsets, - second_subset->num_blocks * sizeof(int)); - - memcpy(output->end_offsets, first_subset->end_offsets, first_subset->num_blocks * sizeof(int)); - memcpy(output->end_offsets+first_subset->num_blocks, second_subset->end_offsets, - second_subset->num_blocks * sizeof(int)); - - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(output); - } else { - output->specifier = __FENIX_SUBSET_CREATEV; + const auto [bstart, bend] = b.range(); + + std::set ret; + if(bstart > 0) ret.merge(*this & DataRegion({0, bstart-1})); + if(b.reps){ + std::set mid; + auto inv = b.inverted(); - output->num_blocks = first_subset->num_blocks + second_subset->num_blocks; - if(first_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < first_subset->num_blocks; i++){ - output->num_blocks += first_subset->num_repeats[i]; + //Get just the portions of myself that could be overlapping + auto parts = *this & DataRegion({bstart, bend}); + + //Handle any non-strided portions individually + for(auto it = parts.begin(); it != parts.end();){ + if(it->reps){ + it++; + continue; + } else { + mid.merge(*it & inv); + it = parts.erase(it); } } - if(second_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < second_subset->num_blocks; i++){ - output->num_blocks += second_subset->num_repeats[i]; + + //Should have taken care of everything, or left only 1 strided region + if(!parts.empty()){ + fenix_assert(parts.size() == 1); + auto a = *parts.begin(); + //First, middle, and last repetitions may be effected differently + mid.merge(a.get_rep(0) & inv); + if(a.reps > 1){ + size_t middle_reps = a.reps-2; + auto middle_valid = a.get_rep(1) & inv; + for(auto block : middle_valid){ + fenix_assert(!block.reps); + mid.insert(DataRegion( + {block.start, block.end}, a.reps-2, b.stride + )); + } } + mid.merge(a.get_rep(a.reps) & inv); } + merge_adjacent_sets(ret, mid); + } + if(bend != MAX){ + auto post = *this & DataRegion({bend+1, MAX}); + merge_adjacent_sets(ret, post); + } + return ret; +} - __fenix_data_subset_init(output->num_blocks, output); +std::string DataRegion::str() const { + std::string ret = "["; - int index = 0; - for(int i = 0; i < first_subset->num_blocks; i++){ - for(int j = 0; j <= first_subset->num_repeats[i]; j++){ - output->start_offsets[index] = j*first_subset->stride + first_subset->start_offsets[i]; - output->end_offsets[index] = j*first_subset->stride + first_subset->end_offsets[i]; - index++; - } - } - for(int i = 0; i < second_subset->num_blocks; i++){ - for(int j = 0; j <= second_subset->num_repeats[i]; j++){ - output->start_offsets[index] = j*second_subset->stride + second_subset->start_offsets[i]; - output->end_offsets[index] = j*second_subset->stride + second_subset->end_offsets[i]; - index++; - } - } + if(start == MAX) ret += "MAX"; + else ret += std::to_string(start); + + ret += ","; + + if(end == MAX) ret += "MAX"; + else ret += std::to_string(end); + + ret += "]"; - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(output); + if(reps){ + ret += "x" + std::to_string(reps+1) + "s" + std::to_string(stride); } + return ret; } -//Merge second subset into first subset -//This reasonably assumes both subsets are already initialized. -void __fenix_data_subset_merge_inplace(Fenix_Data_subset* first_subset, Fenix_Data_subset* second_subset){ - - //Simple cases first - if(first_subset->specifier == __FENIX_SUBSET_FULL || - second_subset->specifier == __FENIX_SUBSET_FULL){ - //We don't need to populate anything else. - first_subset->specifier = __FENIX_SUBSET_FULL; - - } else if(second_subset->specifier == __FENIX_SUBSET_EMPTY){ - //Do nothing. - - } else if(first_subset->specifier == __FENIX_SUBSET_EMPTY){ - //Deep copy requires that the destination be non-initialized, so free sub1 first. - __fenix_data_subset_free(first_subset); - __fenix_data_subset_deep_copy(second_subset, first_subset); - - } else if(first_subset->specifier == __FENIX_SUBSET_CREATE && - second_subset->specifier == __FENIX_SUBSET_CREATE && - first_subset->stride == second_subset->stride){ - //Output is just a CREATE type with combined descriptors. - //Start by making a list of all descriptors, then merge any with overlaps. - first_subset->num_repeats = (int*)s_realloc(first_subset->num_repeats, - (first_subset->num_blocks + second_subset->num_blocks)*sizeof(int)); - first_subset->start_offsets = (int*)s_realloc(first_subset->start_offsets, - (first_subset->num_blocks + second_subset->num_blocks)*sizeof(int)); - first_subset->end_offsets = (int*)s_realloc(first_subset->end_offsets, - (first_subset->num_blocks + second_subset->num_blocks)*sizeof(int)); - - memcpy(first_subset->num_repeats+first_subset->num_blocks, second_subset->num_repeats, - second_subset->num_blocks * sizeof(int)); - - memcpy(first_subset->start_offsets+first_subset->num_blocks, second_subset->start_offsets, - second_subset->num_blocks * sizeof(int)); - - memcpy(first_subset->end_offsets+first_subset->num_blocks, second_subset->end_offsets, - second_subset->num_blocks * sizeof(int)); - - first_subset->num_blocks = first_subset->num_blocks - + second_subset->num_blocks; +DataRegion full_region({0, DataRegion::MAX}); + +struct BlockIter { + using set_t = std::set; + using iter_t = typename set_t::iterator; + + BlockIter(set_t&& m_regions) + : region_holder(std::make_shared(std::move(m_regions))), + regions(*region_holder), it(regions.begin()), rep(0) { } + BlockIter(std::shared_ptr m_regions) + : region_holder(m_regions), + regions(*region_holder), it(regions.begin()), rep(0) { } + BlockIter(const BlockIter& other) + : region_holder(other.region_holder), regions(other.regions), it(other.it), rep(other.rep) { + fenix_assert(rep <= it->reps); + ++*this; + } + + DataRegion operator*(){ + return it->get_rep(rep); + } + + bool operator==(const BlockIter& other) const { + return it == other.it && rep == other.rep; + } + bool operator!=(const BlockIter& other) const { + return !(*this == other); + } + + BlockIter begin(){ + return BlockIter(region_holder); + } + + BlockIter end(){ + auto ret = begin(); + ret.it = regions.end(); + return ret; + } + + BlockIter& operator++(){ + if(rep == it->reps){ + ++it; + rep = 0; + } else { + ++rep; + } + return *this; + } - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(first_subset); + std::shared_ptr region_holder; + + const set_t& regions; + iter_t it; + size_t rep; + +}; + +} // namespace Detail + +using namespace Detail; + +DataSubset::DataSubset(size_t end) : DataSubset({0, end}) { } + +DataSubset::DataSubset(std::pair bounds) + : DataSubset(bounds, 1, DataRegion::MAX) { }; + +DataSubset::DataSubset( + std::pair bounds, size_t n, size_t stride +) { + fenix_assert(bounds.first <= bounds.second, + "subset start (%lu) cannot be after end (%lu)", + bounds.first, bounds.second + ); + fenix_assert(n > 0, "num_blocks (%lu) must be positive", n); + fenix_assert(n == 1 || bounds.first+stride > bounds.second, + "stride %lu too low for region [%lu, %lu]", + stride, bounds.first, bounds.second + ); + + regions.emplace(bounds, n-1, stride); +} + +DataSubset::DataSubset(std::vector> bounds){ + for(const auto& b : bounds){ + fenix_assert(b.first <= b.second, + "subset start (%lu) cannot be after end (%lu)", b.first, b.second + ); + regions.emplace(b); + } + + //Simplify regions + merge_regions(); +} + +DataSubset::DataSubset(const DataSubset& a, const DataSubset& b){ + if(a.empty() || b.empty()){ + *this = a.empty() ? b : a; + return; + } + if(a.regions.count(full_region) || b.regions.count(full_region)){ + regions.insert(full_region); + return; + } + + size_t a_stride = 0, b_stride = 0; + for(const auto& r : a.regions) if(r.reps) a_stride = r.stride; + for(const auto& r : b.regions) if(r.reps) b_stride = r.stride; + + if(a_stride && b_stride && a_stride != b_stride){ + //De-stride A's regions + for(const auto& r : a.regions){ + fenix_assert(r.reps != MAX); + for(int i = 0; i <= r.reps; i++) + regions.insert(r.get_rep(i)); + } } else { - - int new_num_blocks = first_subset->num_blocks + second_subset->num_blocks; - if(first_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < first_subset->num_blocks; i++){ - new_num_blocks += first_subset->num_repeats[i]; + regions = a.regions; + } + + for(const auto& br : b.regions){ + const auto [bstart, bend] = br.range(); + std::set adding = {br}; + for(const auto& r : regions){ + const auto [rstart, rend] = r.range(); + if(rstart > bend) break; + if(rend < bstart) continue; + for(auto it = adding.begin(); it != adding.end();){ + auto valid = *it - r; + if(valid.size() == 1 && valid.count(*it)){ + it++; + } else { + it = adding.erase(it); + adding.merge(valid); + } } } - if(second_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < second_subset->num_blocks; i++){ - new_num_blocks += second_subset->num_repeats[i]; + regions.merge(adding); + } + + //Final attempt to simplify all regions + merge_regions(); +} + +void DataSubset::merge_regions() { + auto check = regions.begin(); + while(check != regions.end()){ + auto crange = check->range(); + + bool erase = false; + for(auto i = std::next(check); i != regions.end(); i++){ + if(crange.second < i->start-1) break; + + size_t merge_idx = -1; + size_t merge_reps = -1; + + size_t matching_end = i->start-1; + size_t matching_start = i->end+1; + if((matching_end - check->end)%check->stride == 0){ + merge_idx = (matching_end-check->end)/check->stride; + merge_reps = std::min(check->reps-merge_idx, i->reps); + + //Add the merged region + regions.insert(DataRegion( + {check->start+merge_idx*check->stride, i->end}, + merge_reps, check->stride + )); + } else if((matching_start - check->start)%check->stride == 0){ + merge_idx = (matching_start-check->start)/check->stride; + merge_reps = std::min(check->reps-merge_idx, i->reps); + + //Add the merged region + regions.insert(DataRegion( + {i->start, check->end+merge_idx*check->stride}, + merge_reps, check->stride + )); } - } - first_subset->num_repeats = (int*)s_realloc(first_subset->num_repeats, new_num_blocks*sizeof(int)); - first_subset->start_offsets = (int*)s_realloc(first_subset->start_offsets, new_num_blocks*sizeof(int)); - first_subset->end_offsets = (int*)s_realloc(first_subset->end_offsets, new_num_blocks*sizeof(int)); - - //work backwards to prevent overwriting current data. - - int index = new_num_blocks-1; - for(int i = second_subset->num_blocks-1; i >= 0; i--){ - for(int j = 0; j <= second_subset->num_repeats[i]; j++){ - first_subset->start_offsets[index] = j*second_subset->stride - + second_subset->start_offsets[i]; - first_subset->end_offsets[index] = j*second_subset->stride - + second_subset->end_offsets[i]; - first_subset->num_repeats[index] = 0; - index--; + if(merge_idx != -1){ + erase = true; + + //Add any blocks before the merge + if(merge_idx > 0){ + regions.insert(DataRegion( + {check->start, check->end}, merge_idx-1, check->stride + )); + } + //Add any blocks after the merge + if(merge_idx+merge_reps < check->reps){ + size_t n_pre = merge_idx+merge_reps+1; + size_t offset = n_pre*check->stride; + regions.insert(DataRegion( + {check->start+offset, check->end+offset}, + check->reps-n_pre, check->stride + )); + } else if(merge_reps < i->reps){ + size_t n_pre = merge_reps+1; + size_t offset = n_pre*i->stride; + regions.insert(DataRegion( + {i->start+offset, i->end+offset}, + i->reps-n_pre, i->stride + )); + } + + regions.erase(i); + break; } } - for(int i = first_subset->num_blocks-1; i >= 0; i--){ - for(int j = 0; j <= first_subset->num_repeats[i]; j++){ - first_subset->start_offsets[index] = j*first_subset->stride - + first_subset->start_offsets[i]; - first_subset->end_offsets[index] = j*first_subset->stride - + first_subset->end_offsets[i]; - first_subset->num_repeats[index] = 0; - index--; - } + + auto it = check++; + if(erase){ + regions.erase(it); } - first_subset->specifier = __FENIX_SUBSET_CREATEV; - first_subset->num_blocks = new_num_blocks; + } +} - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(first_subset); +DataSubset::DataSubset(const DataBuffer& buf){ + fenix_assert(buf.size()%sizeof(DataRegion) == 0); + + size_t n_regions = buf.size()/sizeof(DataRegion); + if(n_regions == 0) return; + + DataRegion* r = (DataRegion*) buf.data(); + for(int i = 0; i < n_regions; i++){ + regions.insert(*(r++)); } +} + +void DataSubset::serialize(DataBuffer& buf) const { + buf.reset(regions.size()*sizeof(DataRegion)); + DataRegion* r = (DataRegion*) buf.data(); + for(const auto& region : regions){ + *(r++) = region; + } +} + +DataSubset DataSubset::operator+(const DataSubset& other) const { + return DataSubset(*this, other); +} + +DataSubset DataSubset::operator+(const Fenix_Data_subset& other) const { + return *this + *(DataSubset*)other.impl; +} +DataSubset& DataSubset::operator+=(const DataSubset& other) { + *this = *this + other; + return *this; } +DataSubset& DataSubset::operator+=(const Fenix_Data_subset& other) { + return *this += *(DataSubset*)other.impl; +} + +DataSubset DataSubset::operator-(const DataSubset& other) const { + if(empty() || other.empty()) return *this; + + size_t a_stride = 0, b_stride = 0; + for(const auto& r : regions) if(r.reps) a_stride = r.stride; + for(const auto& r : other.regions) if(r.reps) b_stride = r.stride; + + fenix_assert(!( + a_stride && b_stride && a_stride != b_stride && + end() == MAX && other.end() == MAX + )); -void __fenix_data_subset_copy_data(Fenix_Data_subset* ss, void* dest, void* src, size_t data_type_size, size_t max_size){ - if(ss->specifier == __FENIX_SUBSET_FULL){ - memcpy(dest, src, max_size*data_type_size); - } else if(ss->specifier != __FENIX_SUBSET_EMPTY){ - for(int i = 0; i < ss->num_blocks; i++){ - //Inclusive both directions, so add 1. - int length = ss->end_offsets[i]-ss->start_offsets[i] + 1; - - for(int j = 0; j <= ss->num_repeats[i]; j++){ - int start = ss->start_offsets[i] + j*ss->stride; - memcpy( ((uint8_t*)dest) + start*data_type_size, ((uint8_t*)src) + start*data_type_size, length*data_type_size); + std::set remaining; + if(a_stride && b_stride && a_stride != b_stride){ + size_t b_end = other.end(); + + //Insert individual repetitions possibly overlapping regions + for(const auto& b : BlockIter(bounded_regions(0, b_end))){ + remaining.insert(b); + } + if(b_end != MAX){ + //Leave non-overlapping regions strided + auto br = bounded_regions(b_end+1, MAX); + for(const auto& b : br){ + remaining.insert(b); } } + } else { + remaining = regions; } -} -int __fenix_data_subset_data_size(Fenix_Data_subset* ss, size_t max_size){ - int size; + for(const auto& b_r : other.regions){ + auto next = remaining.begin(); + while(next != remaining.end()){ + auto it = next++; - if(ss->specifier == __FENIX_SUBSET_FULL){ - size = max_size; - } else if( ss->specifier == __FENIX_SUBSET_EMPTY){ - size = 0; - } else { - size = 0; - for(int i = 0; i < ss->num_blocks; i++){ - size += (ss->end_offsets[i] - ss->start_offsets[i] + 1)*(ss->num_repeats[i]+1); + if(b_r.start > it->range().second) continue; + if(it->start > b_r.range().second) break; + + auto r = *it; + remaining.erase(it); + remaining.merge(r - b_r); } } - return size; + DataSubset c; + c.regions = std::move(remaining); + c.merge_regions(); + return c; } -int __fenix_data_subset_is_full(Fenix_Data_subset *ss, size_t data_length){ - //Assumes a "simplified" subset which has all mergeable regions merged. - return (ss->specifier == __FENIX_SUBSET_FULL) || - ( (ss->start_offsets[0] == 0) && (ss->end_offsets[0] == data_length-1) ); -} +bool DataSubset::operator==(const DataSubset& other) const { + //Making checks in approximate order of cost + if(start() != other.start()) return false; + if(regions.empty() != other.regions.empty()) return false; + if(regions == other.regions) return true; + if(end() != other.end()) return false; -//Makes an array with the in-order contents of subset ss of src. -//size is updated to the size of the serialized array, which is returned as the function's return. -//User's responsibility to free the returned array. -void* __fenix_data_subset_serialize(Fenix_Data_subset* ss, void* src, size_t type_size, size_t max_size, size_t* size){ - - void* dest; - - if(ss->specifier == __FENIX_SUBSET_FULL){ - dest = malloc(type_size*max_size); + size_t a_stride = 0, b_stride = 0; + for(const auto& r : regions) if(r.reps) a_stride = r.stride; + for(const auto& r : other.regions) if(r.reps) b_stride = r.stride; - memcpy(dest, src, type_size*max_size); + //We won't try comparing infinite regions of different strides. + if(a_stride && b_stride && a_stride != b_stride && end() == MAX) + return false; - *size = max_size; + return (*this - other).empty() && (other - *this).empty(); +} - } else if(ss->specifier == __FENIX_SUBSET_EMPTY) { +bool DataSubset::operator!=(const DataSubset& other) const { + return !(*this == other); +} - dest = NULL; - size = 0; +bool DataSubset::empty() const { + return regions.empty(); +} - } else { - //First, count up the number of entries to find a size. - *size = __fenix_data_subset_data_size(ss, max_size); +std::pair DataSubset::range() const { + return {start(), end()}; +} - dest = malloc(type_size * (*size)); - - int* current_repetition = (int*) s_calloc(ss->num_blocks, sizeof(int)); - //We need to be sure to go in the right order. - int stored = 0; - while(stored < *size){ - int lowest_index = -1; - int lowest_block = -1; - for(int i = 0; i < ss->num_blocks; i++){ - if(current_repetition[i] <= ss->num_repeats[i]){ - if(lowest_index == -1 || - (lowest_index > ss->start_offsets[i]+ss->stride*current_repetition[i])){ - lowest_index = ss->start_offsets[i] + ss->stride*current_repetition[i]; - lowest_block = i; - } - } - } - - memcpy(((uint8_t*)dest)+stored*type_size, ((uint8_t*)src)+lowest_index*type_size, - type_size*(ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1) ); - stored += ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1; - current_repetition[lowest_block]++; - } +size_t DataSubset::start() const { + if(regions.empty()) return -1; + return regions.begin()->start; +} - free(current_repetition); +size_t DataSubset::end() const { + if(empty()) return -1; + size_t ret = 0; + for(const auto& r : regions){ + ret = std::max(ret, r.range().second); } + return ret; +} - - return dest; +std::set DataSubset::bounded_regions(size_t max_idx) const { + return bounded_regions(0, max_idx); } -void __fenix_data_subset_deserialize(Fenix_Data_subset* ss, void* src, void* dest, size_t max_size, size_t type_size){ - if(ss->specifier == __FENIX_SUBSET_FULL){ - memcpy(dest, src, type_size*max_size); - - } else if(ss->specifier != __FENIX_SUBSET_EMPTY){ - //First, count up the number of entries to find a size. - int size = __fenix_data_subset_data_size(ss, max_size); - - int* current_repetition = (int*) s_calloc(ss->num_blocks, sizeof(int)); - //We need to be sure to go in the right order. - int restored = 0; - while(restored < size){ - int lowest_index = -1; - int lowest_block = -1; - for(int i = 0; i < ss->num_blocks; i++){ - if(current_repetition[i] <= ss->num_repeats[i]){ - if(lowest_index == -1 || - (lowest_index > ss->start_offsets[i]+ss->stride*current_repetition[i])){ - lowest_index = ss->start_offsets[i] + ss->stride*current_repetition[i]; - lowest_block = i; - } - } - } - - memcpy(((uint8_t*)dest)+lowest_index*type_size, ((uint8_t*)src)+restored*type_size, - type_size*(ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1) ); - restored += ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1; - current_repetition[lowest_block]++; - } +std::set DataSubset::bounded_regions( + size_t start, size_t end +) const { + std::set ret; + DataRegion bounds({start, end}); + for(const auto& r : regions){ + if(r.start > end) break; + if(r.range().second < start) continue; + ret.merge(r & bounds); + } + return ret; +} - free(current_repetition); +size_t DataSubset::count(size_t max_index) const { + size_t ret = 0; + for(const auto& r : bounded_regions(max_index)){ + size_t c = r.count(); + if(c == MAX) return 0; + ret+= c; } + return ret; +} +size_t DataSubset::max_count() const { + return end()+1; } -void __fenix_data_subset_send(Fenix_Data_subset* ss, int dest, int tag, MPI_Comm comm){ - int* toSend = (int*)malloc(sizeof(int) * (3 + 3*ss->num_blocks)); - toSend[0] = ss->num_blocks; +void DataSubset::serialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst +) const { + if(regions.empty()){ + dst.resize(0); + return; + } + fenix_assert(src.size()%elm_size == 0); + const size_t max_elm = src.size()/elm_size-1; + + dst.reset(count(max_elm) * elm_size); + char* ptr = dst.data(); - for(int i = 0; i < ss->num_blocks; i++){ - toSend[1+3*i] = ss->start_offsets[i]; - toSend[2+3*i] = ss->end_offsets[i]; - toSend[3+3*i] = ss->num_repeats[i]; + for(const auto b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert((ptr+len)-dst.data() <= dst.size()); + fenix_assert(start+len <= src.size()); + + memcpy(ptr, src.data()+start, len); + ptr += len; } - toSend[1+3*ss->num_blocks] = ss->stride; - toSend[2+3*ss->num_blocks] = ss->specifier; + fenix_assert(ptr == dst.data()+dst.size()); +} + +void DataSubset::deserialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst +) const { + if(regions.empty()) return; + fenix_assert(dst.size()%elm_size==0); + + size_t max_elm = dst.size()/elm_size - 1; + if(max_elm == 0){ + max_elm = end(); + fenix_assert(max_elm != MAX); + dst.resize((max_elm+1)*elm_size); + } + + fenix_assert(src.size() == count(max_elm)*elm_size); + const char* ptr = src.data(); + + for(const auto& b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert((ptr+len)-src.data() <= src.size()); + fenix_assert(start+len <= dst.size()); + + memcpy(dst.data()+start, ptr, len); + ptr += len; + } - MPI_Send((void*)toSend, 3*ss->num_blocks + 3, MPI_INT, dest, tag, comm); - free(toSend); + fenix_assert(ptr == src.data()+src.size()); } -void __fenix_data_subset_recv(Fenix_Data_subset* ss, int src, int tag, MPI_Comm comm){ - MPI_Status status; - MPI_Probe(src, tag, comm, &status); +void DataSubset::copy_data( + const size_t elm_size, const size_t src_len, const char* src, DataBuffer& dst +) const { + if(regions.empty()) return; + fenix_assert(src_len != 0 || end() != MAX, + "must specify either a maximum element count or provide a limited-bounds data subset"); - int size; - MPI_Get_count(&status, MPI_INT, &size); + size_t max_elm = src_len ? src_len-1 : end(); + if(dst.size() < (max_elm+1)*elm_size) dst.resize((max_elm+1)*elm_size); - int *recvd = (int*)malloc(sizeof(int) * size); - MPI_Recv((void*)recvd, size, MPI_INT, src, tag, comm, NULL); + for(const auto& b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; - __fenix_data_subset_init(recvd[0], ss); - for(int i = 0; i < ss->num_blocks; i++){ - ss->start_offsets[i] = recvd[1+3*i]; - ss->end_offsets[i] = recvd[2+3*i]; - ss->num_repeats[i] = recvd[3+3*i]; + fenix_assert(src_len == 0 || (start+len)/elm_size <= src_len); + fenix_assert(start+len <= dst.size()); + + memcpy(dst.data()+start, src+start, len); } - ss->stride = recvd[1+3*ss->num_blocks]; - ss->specifier = recvd[2+3*ss->num_blocks]; +} + +void DataSubset::copy_data( + const size_t elm_size, const DataBuffer& src, const size_t dst_len, char* dst +) const { + if(regions.empty()) return; + fenix_assert(dst_len != 0 || end() != MAX, + "must specify either a maximum element count or provide a limited-bounds data subset"); + + size_t max_elm = std::min(dst_len ? dst_len-1 : end(), src.size()); - free(recvd); + for(const auto& b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert(dst_len == 0 || (start+len)/elm_size <= dst_len); + fenix_assert(start+len <= src.size()); + + memcpy(dst+start, src.data()+start, len); + } } +bool DataSubset::includes(size_t idx) const { + return !bounded_regions(idx, idx).empty(); +} + +bool DataSubset::includes_all(size_t end) const { + std::set remaining = {DataRegion({0, end})}; + + for(const auto& r : regions){ + if(r.start > end) break; + + auto next = remaining.begin(); + while(next != remaining.end()){ + auto it = next++; + auto rem = *it; + + remaining.erase(it); + remaining.merge(rem - r); + } + } + + return remaining.empty(); +} -int __fenix_data_subset_free( Fenix_Data_subset *subset_specifier ) { - int retval = FENIX_SUCCESS; - if(subset_specifier->specifier == __FENIX_SUBSET_UNDEFINED){ - fprintf(stderr, "Detected double free of subset!\n"); - } - free( subset_specifier->num_repeats ); - free( subset_specifier->start_offsets ); - free( subset_specifier->end_offsets ); - subset_specifier->specifier = __FENIX_SUBSET_UNDEFINED; - return retval; +std::string DataSubset::str() const { + std::string ret = "{"; + for(const auto& r : regions) ret += r.str() + ", "; + if(!empty()){ + ret.pop_back(); + ret.pop_back(); + } + ret += "}"; + return ret; } -/** - * @brief - * @param subset_specifier - */ -int __fenix_data_subset_delete( Fenix_Data_subset *subset_specifier ) { - __fenix_data_subset_free(subset_specifier); - free(subset_specifier); +} // namespace Fenix + +using namespace Fenix; + +int __fenix_data_subset_create( + int num_blocks, int start, int end, int stride, Fenix_Data_subset *subset +) { + subset->impl = new DataSubset({start, end}, num_blocks, stride); return FENIX_SUCCESS; } + +int __fenix_data_subset_createv( + int num_blocks, int *starts, int *ends, Fenix_Data_subset *subset +) { + fenix_assert(num_blocks > 0, "num_blocks (%d) must be positive", num_blocks); + + std::vector> bounds; + bounds.reserve(num_blocks); + for(int i = 0; i < num_blocks; i++) bounds.push_back({starts[i], ends[i]}); + + subset->impl = new DataSubset(bounds); + + return FENIX_SUCCESS; +} + +int __fenix_data_subset_free( Fenix_Data_subset *subset ) { + delete (DataSubset*) subset->impl; + return FENIX_SUCCESS; +} diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index 51b7a31..1435db6 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -68,45 +68,60 @@ #include using namespace Fenix; - -int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, char ***argv, - int spare_ranks, - int spawn, - MPI_Info info, int *error, jmp_buf *jump_environment) -{ - *role = fenix.role; - *error = 0; - - fenix.user_world = new_comm; - - MPI_Comm_create_errhandler(__fenix_test_MPI, &fenix.mpi_errhandler); - - fenix.world = (MPI_Comm *)malloc(sizeof(MPI_Comm)); - MPI_Comm_dup(comm, fenix.world); - PMPI_Comm_set_errhandler(*fenix.world, fenix.mpi_errhandler); - - fenix.spare_ranks = spare_ranks; - fenix.spawn_policy = spawn; - fenix.recover_environment = jump_environment; - fenix.ret_role = role; - fenix.ret_error = error; - - MPI_Op_create((MPI_User_function *) __fenix_ranks_agree, 1, &fenix.agree_op); - - /* Check the values in info */ - if (info != MPI_INFO_NULL) { +using namespace Fenix::Data; + +int __fenix_preinit( + int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, char ***argv, + int spare_ranks, int spawn, MPI_Info info, int *error, jmp_buf *jump_env +) { + Args::FenixInitArgs args; + args.role = role; + args.in_comm = comm; + args.out_comm = new_comm; + args.argc = argc; + args.argv = argv; + args.spares = spare_ranks; + args.spawn = spawn; + args.err = error; + if(info != MPI_INFO_NULL){ char value[MPI_MAX_INFO_VAL + 1]; int vallen = MPI_MAX_INFO_VAL; int found; MPI_Info_get(info, "FENIX_RESUME_MODE", vallen, value, &found); - if (found) __fenix_set_resume_mode(value); + if(found) args.resume_mode = get_resume_mode(value); + else args.resume_mode = JUMP; MPI_Info_get(info, "FENIX_UNHANDLED_MODE", vallen, value, &found); - if (found) __fenix_set_unhandled_mode(value); + if(found) args.unhandled_mode = get_unhandled_mode(value); + } else { + args.resume_mode = JUMP; } + return fenix_preinit(args, jump_env); +} + +int fenix_preinit(const Args::FenixInitArgs& args, jmp_buf* jump_env){ + fenix.world = (MPI_Comm *)malloc(sizeof(MPI_Comm)); + MPI_Comm_dup(args.in_comm, fenix.world); + + MPI_Comm_create_errhandler(__fenix_test_MPI, &fenix.mpi_errhandler); + PMPI_Comm_set_errhandler(*fenix.world, fenix.mpi_errhandler); + + fenix.user_world = args.out_comm; + fenix.spare_ranks = args.spares; + fenix.spawn_policy = args.spawn; + fenix.recover_environment = jump_env; + fenix.resume_mode = args.resume_mode; + fenix.unhandled_mode = args.unhandled_mode; + fenix.ret_role = args.role ? args.role : &fenix.role; + fenix.ret_error = args.err ? args.err : &fenix.repair_result; - if (fenix.spare_ranks >= __fenix_get_world_size(comm)) { + *fenix.ret_role = fenix.role; + *fenix.ret_error = FENIX_SUCCESS; + + MPI_Op_create((MPI_User_function *) __fenix_ranks_agree, 1, &fenix.agree_op); + + if (fenix.spare_ranks >= __fenix_get_world_size(*fenix.world)) { debug_print("Fenix: <%d> spare ranks requested are unavailable\n", fenix.spare_ranks); } @@ -114,7 +129,7 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.data_recovery = __fenix_data_recovery_init(); /*****************************************************/ - /* Note: fenix.new_world is only valid for the */ + /* Note: fenix.new_world is only valid for the */ /* active MPI ranks. Spare ranks do not */ /* allocate any communicator content with this.*/ /* Any MPI calls in spare ranks with new_world */ @@ -129,16 +144,16 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", __fenix_get_current_rank(*fenix.world), fenix.role, - fenix.num_inital_ranks); + fenix.num_inital_ranks); } } else { - fenix.num_inital_ranks = spare_ranks; + fenix.num_inital_ranks = fenix.spare_ranks; if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", __fenix_get_current_rank(*fenix.world), fenix.role, - fenix.num_inital_ranks); + fenix.num_inital_ranks); } } @@ -146,7 +161,6 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha while ( __fenix_spare_rank() == 1) { int a; - int myrank; MPI_Status mpi_status; fenix.ignore_errs = true; int ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, @@ -177,30 +191,26 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha return fenix.role; } -void __fenix_set_resume_mode(const std::string_view& name){ +Fenix_Resume_mode get_resume_mode(const std::string_view& name){ if (name == "JUMP") { - fenix.resume_mode = Fenix_Resume_mode::JUMP; + return Fenix_Resume_mode::JUMP; } else if (name == "RETURN") { - fenix.resume_mode = Fenix_Resume_mode::RETURN; + return Fenix_Resume_mode::RETURN; } else if (name == "THROW") { - fenix.resume_mode = Fenix_Resume_mode::THROW; - } else { - fprintf(stderr, "Unsupported FENIX_RESUME_MODE %s\n", name.data()); - MPI_Abort(*fenix.world, 1); + return Fenix_Resume_mode::THROW; } + fatal_print("Unsupported FENIX_RESUME_MODE %s", name.data()); } -void __fenix_set_unhandled_mode(const std::string_view& name){ +Fenix_Unhandled_mode get_unhandled_mode(const std::string_view& name){ if (name == "SILENT") { - fenix.resume_mode = Fenix_Unhandled_mode::SILENT; + return Fenix_Unhandled_mode::SILENT; } else if (name == "PRINT") { - fenix.resume_mode = Fenix_Unhandled_mode::PRINT; + return Fenix_Unhandled_mode::PRINT; } else if (name == "ABORT") { - fenix.resume_mode = Fenix_Unhandled_mode::ABORT; - } else { - fprintf(stderr, "Unsupported FENIX_UNHANDLED_MODE %s\n", name.data()); - MPI_Abort(*fenix.world, 1); + return Fenix_Unhandled_mode::ABORT; } + fatal_print("Unsupported FENIX_UNHANDLED_MODE %s", name.data()); } int __fenix_spare_rank_within(MPI_Comm refcomm) @@ -839,7 +849,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) case RETURN: break; case THROW: - throw CommException(*fenix.user_world, *fenix.ret_error); + Fenix::throw_exception(); break; default: printf("Fenix internal error: Unknown resume mode %d\n", fenix.resume_mode); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ba6f65c..1d59516 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,8 +1,8 @@ -add_subdirectory(subset_internal) -add_subdirectory(subset_merging) +add_subdirectory(subset) add_subdirectory(request_tracking) add_subdirectory(request_cancelled) add_subdirectory(no_jump) add_subdirectory(issend) add_subdirectory(failed_spares) add_subdirectory(exception_throw) +add_subdirectory(storev) diff --git a/test/subset_merging/CMakeLists.txt b/test/storev/CMakeLists.txt similarity index 63% rename from test/subset_merging/CMakeLists.txt rename to test/storev/CMakeLists.txt index 603686e..d87a3c9 100644 --- a/test/subset_merging/CMakeLists.txt +++ b/test/storev/CMakeLists.txt @@ -8,7 +8,8 @@ # directory. # -add_executable(fenix_subset_merging_test fenix_subset_merging_test.c) -target_link_libraries(fenix_subset_merging_test fenix) +add_executable(storev storev.cpp) +target_link_libraries(storev fenix MPI::MPI_CXX) -add_test(subset_merging fenix_subset_merging_test) +add_test(NAME storev + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} storev ${MPIEXEC_POSTFLAGS}) diff --git a/test/storev/storev.cpp b/test/storev/storev.cpp new file mode 100644 index 0000000..991741a --- /dev/null +++ b/test/storev/storev.cpp @@ -0,0 +1,201 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr int kKillID = 2; +constexpr int my_group = 0; +constexpr int my_member = 0; +constexpr int start_timestamp = 0; +constexpr int group_depth = 1; +int errflag; + +using Fenix::DataSubset; +using namespace Fenix::Data; + +int main(int argc, char **argv) { + MPI_Init(&argc, &argv); + + MPI_Comm res_comm; + Fenix::init({.out_comm = &res_comm, .spares = 1}); + + int num_ranks, rank; + MPI_Comm_size(res_comm, &num_ranks); + MPI_Comm_rank(res_comm, &rank); + + std::vector data; + + bool should_throw = Fenix_get_role() == FENIX_ROLE_RECOVERED_RANK; + while(true) try { + if(should_throw){ + should_throw = false; + Fenix::throw_exception(); + } + + //Initial work and commits + if(Fenix_get_role() == FENIX_ROLE_INITIAL_RANK){ + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + Fenix_Data_member_create( + my_group, my_member, data.data(), FENIX_RESIZEABLE, MPI_INT + ); + + data.resize(100 + rank); + for(int& i : data) i = -1; + + + //Store the whole array first. We need to keep our buffer pointer updated + //since resizing an array can change it + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_storev(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + //Now commit a smaller portion with different data. + data.resize(50 + rank); + int val = 1; + for(int& i : data) i = val++; + + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_storev(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + if(rank == kKillID){ + fprintf(stderr, "Doing kill on node %d\n", rank); + raise(SIGTERM); + } + } + + Fenix_Finalize(); + break; + } catch (const Fenix::CommException& e) { + const Fenix::CommException* err = &e; + while(true) try { + //We've had a failure! Time to recover data. + fprintf(stderr, "Starting data recovery on rank %d\n", rank); + if(err->fenix_err != FENIX_SUCCESS){ + fprintf(stderr, "FAILURE on Fenix Init (%d). Exiting.\n", err->fenix_err); + exit(1); + } + + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + + //Do a null restore to get information about the stored subset + DataSubset stored_subset; + int ret = member_restore( + my_group, my_member, nullptr, 0, FENIX_TIME_STAMP_MAX, stored_subset + ); + if(ret != FENIX_SUCCESS) { + fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, ret); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + //Resize data to fit all stored data + data.resize(stored_subset.end()+1); + + //Set all data to a value that was never stored, just for testing + for(int& i : data) i = -2; + + //Now do an lrestore to get the recovered data. + ret = member_lrestore( + my_group, my_member, data.data(), data.size(), FENIX_TIME_STAMP_MAX, + stored_subset + ); + + break; + } catch (const Fenix::CommException& nested){ + err = &nested; + } + } + + //Ensure data is correct after execution and recovery + bool successful = data.size() == 50 + rank; + if(!successful) printf("Rank %d expected data size 50, but got %d\n", rank, data.size()); + + for(int i = 0; i < data.size() && successful; i++){ + successful &= data[i] == i+1; + if(!successful) printf("Rank %d data[%d]=%d, but should be %d!\n", rank, i, data[i], i+1); + } + + if(successful){ + printf("Rank %d successfully recovered\n", rank); + } else { + printf("FAILURE on rank %d\n", rank); + } + + MPI_Finalize(); + return !successful; //return error status +} diff --git a/test/subset/CMakeLists.txt b/test/subset/CMakeLists.txt new file mode 100644 index 0000000..5029cfd --- /dev/null +++ b/test/subset/CMakeLists.txt @@ -0,0 +1,24 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# +add_executable(fenix_subset_includes subset_includes.cpp) +target_link_libraries(fenix_subset_includes fenix) +add_test(subset_includes fenix_subset_includes) + +add_executable(fenix_subset_subtraction subset_subtraction.cpp) +target_link_libraries(fenix_subset_subtraction fenix) +add_test(subset_subtraction fenix_subset_subtraction) + +add_executable(fenix_subset_copy subset_copy.cpp) +target_link_libraries(fenix_subset_copy fenix) +add_test(subset_copy fenix_subset_copy) + +add_executable(fenix_subset_addition subset_addition.cpp) +target_link_libraries(fenix_subset_addition fenix) +add_test(subset_addition fenix_subset_addition) diff --git a/test/subset_internal/fenix_subset_internal_test.c b/test/subset/subset_addition.cpp similarity index 51% rename from test/subset_internal/fenix_subset_internal_test.c rename to test/subset/subset_addition.cpp index 3e797ed..2491631 100644 --- a/test/subset_internal/fenix_subset_internal_test.c +++ b/test/subset/subset_addition.cpp @@ -63,95 +63,76 @@ #include #include -int _verify_subset( double *data, int num_blocks, int start_offset, int end_offset, int stride, - Fenix_Data_subset *subset_specifier ); +#include -int _verify_subset( double *data, int num_repeats, int start_offset, int end_offset, int stride, - Fenix_Data_subset *sp) -{ - int i, j; - int idx; - int block_size; - int flag = 0; - double accumulator =0.0; +#include "subset_common.hpp" - if( num_repeats != sp->num_repeats[0]+1 ) { - flag = 1; - printf("num_repeats set incorrectly."); - } - if( start_offset != sp->start_offsets[0] ) { - flag = 2; - printf("start_offset set incorrectly\n"); - } - if( end_offset != sp->end_offsets[0]){ - flag = 3; - printf("end_offset set incorrectly\n"); - } - if( sp->specifier != __FENIX_SUBSET_CREATE ) { - flag = 4; - printf("specifier set incorrectly\n"); - } - if(stride != sp->stride){ - flag = 5; - printf("stride set incorrectly\n"); +using namespace Fenix; + +bool test_addition(const DataSubset& a, const DataSubset& b){ + printf("Testing subsets a=%s, b=%s\n", a.str().c_str(), b.str().c_str()); + + const DataSubset c = a + b; + const DataSubset d = b + a; + + printf("c=a+b=%s\n", c.str().c_str()); + printf("d=b+a=%s\n", d.str().c_str()); + + if(c != d){ + printf("a+b != b+a\n"); + return false; } - /* Iterate over the loop to see if any memory error occurs*/ - idx = start_offset; - block_size = end_offset - start_offset; - for ( i = 0; i < num_repeats; i++ ) { - for( j = 0; j < block_size; j++ ) { - accumulator += data[idx+j]; + size_t start = std::min(a.start(), b.start()); + size_t end; + if(a.end() == -1 || b.end() == -1){ + end = start+1000; + } else { + end = std::max(a.end(), b.end()) + 10; + } + + for(int i = start; i <= end; i++){ + if(c.includes(i) != (a.includes(i) || b.includes(i))){ + if(c.includes(i)){ + printf("c=a+b incorrectly includes index %d not in a or b\n", i); + return false; + } else { + printf( + "c=a+b incorrectly excludes index %d in %s\n", i, + a.includes(i) ? b.includes(i) ? "both" : "a" : "b" + ); + return false; + } + } + if(d.includes(i) != (a.includes(i) || b.includes(i))){ + if(d.includes(i)){ + printf("d=b+a incorrectly includes index %d not in a or b\n", i); + return false; + } else { + printf( + "d=b+a incorrectly excludes index %d in %s\n", i, + a.includes(i) ? b.includes(i) ? "both" : "a" : "b" + ); + return false; + } } - idx += stride; } - return flag; + return true; } - int main(int argc, char **argv) { - Fenix_Data_subset subset_specifier; - int num_blocks; - int start_offset, end_offset, stride; - int space_size; - double *d_space; - - if (argc < 6) { - printf("Usage: %s <# blocks> \n", *argv); - exit(0); - } - - space_size = atoi(argv[1]); - num_blocks = atoi(argv[2]); - start_offset = atoi(argv[3]); - end_offset = atoi(argv[4]); - stride = atoi(argv[5]); + bool success = true; - if( space_size < (num_blocks * stride + start_offset) ) { - printf("Error: Array size is smaller than (the number of blocks x stride) + start_offset\n"); - printf("Aborting\n"); - exit(0); + auto subsets = get_subsets(); + for(const auto& a : subsets){ + for(const auto& b : subsets){ + success &= test_addition(a, b); + } } - if( start_offset > end_offset ) { - printf("Error: Start offset must be less than end_offset\n"); - printf("Aborting\n"); - exit(0); - } - - d_space = (double *)malloc(sizeof(double)*space_size); - Fenix_Data_subset_create(num_blocks, start_offset, end_offset, stride, &subset_specifier); - //data_subset_create( num_blocks, start_offset, end_offset, stride, &subset_specifier ); - // Verification - int err_code = _verify_subset( d_space, num_blocks, start_offset, end_offset, stride, &subset_specifier ); - // free_data_subset_fixed ( &subset_specifier); - free(d_space); - if( err_code == 0 ) { - printf("Passed\n"); - } - return err_code; + return success ? 0 : 1; } diff --git a/test/subset/subset_common.hpp b/test/subset/subset_common.hpp new file mode 100644 index 0000000..2076fc3 --- /dev/null +++ b/test/subset/subset_common.hpp @@ -0,0 +1,79 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +#include + +//Returns a variety of subsets to perform tests on +static std::vector get_subsets(){ + using namespace Fenix; + std::vector ret; + ret.push_back(DataSubset()); + ret.push_back(DataSubset(10)); + ret.push_back(DataSubset(-1)); + ret.push_back(DataSubset({0, 10})); + ret.push_back(DataSubset({0, -1})); + ret.push_back(DataSubset({5, 10})); + ret.push_back(DataSubset({5, -1})); + ret.push_back(DataSubset({0, 4}, 2, 5)); + ret.push_back(DataSubset({0, 4}, 2, 6)); + ret.push_back(DataSubset({0, 4}, 10, 6)); + ret.push_back(DataSubset({0, 4}, 10, 10)); + return ret; +} diff --git a/test/subset/subset_copy.cpp b/test/subset/subset_copy.cpp new file mode 100644 index 0000000..2b7ae38 --- /dev/null +++ b/test/subset/subset_copy.cpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "subset_common.hpp" + +using namespace Fenix; + +bool test_copy(const DataSubset& a){ + size_t count = a.max_count(); + if(count == 0) count = 1000; + + std::vector in, out; + in.resize(count); + out.resize(count); + + DataBuffer buf; + + for(int& i : in) i = 1; + for(int& i : out) i = 0; + + a.copy_data(sizeof(int), count, (char*)in.data(), buf); + a.copy_data(sizeof(int), buf, count, (char*)out.data()); + + for(int i = 0; i < count; i++){ + if(a.includes(i) && out[i] != 1){ + printf("Failed to transfer index %d\n", i); + return false; + } else if(!a.includes(i) && out[i] != 0){ + printf("Incorrectly transfered index %d\n", i); + return false; + } + } + + return true; +} + +int main(int argc, char **argv) +{ + bool success = true; + + auto subsets = get_subsets(); + for(const auto& a : subsets){ + success &= test_copy(a); + } + + return success ? 0 : 1; +} + + diff --git a/test/subset/subset_includes.cpp b/test/subset/subset_includes.cpp new file mode 100644 index 0000000..332967d --- /dev/null +++ b/test/subset/subset_includes.cpp @@ -0,0 +1,166 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace Fenix; + +bool test_unstrided(int start, int end){ + printf("Testing subset [%d, %d] \n", start, end); + std::cout.flush(); + + DataSubset s({start, end}); + for(int i = 0; i < start; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d] incorrectly includes %d\n", start, end, i); + return false; + } + } + + int test_end = end == -1 ? start+20 : end; + for(int i = start; i <= test_end; i++){ + if(!s.includes(i)){ + printf("Subset [%d, %d] incorrectly excludes %d\n", start, end, i); + return false; + } + } + + if(end != -1){ + for(int i = end+1; i <= test_end+20; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d] incorrectly includes %d\n", start, end, i); + return false; + } + } + } + + return true; +} + +bool test_strided(int start, int end, int count, int stride){ + printf("Testing subset [%d, %d]x%d stride %d \n", start, end, count, stride); + std::cout.flush(); + + DataSubset s({start, end}, count, stride); + + for(int i = 0; i < start; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly includes %d\n", start, end, count, stride, i); + return false; + } + } + for(int b = 0; b < count && b < 5; b++){ + int b_start = start+b*stride; + int b_end = end+b*stride; + for(int i = b_start; i <= b_end; i++){ + if(!s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly excludes %d\n", start, end, count, stride, i); + return false; + } + } + } + for(int b = 0; b < count-1 && b < 5; b++) { + int b_end = end+b*stride; + int next_b_start = start + (b+1)*stride; + for(int i = b_end+1; i < next_b_start; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly includes %d\n", start, end, count, stride, i); + return false; + } + } + } + + int range_end = end+(count-1)*stride; + for(int i = range_end+1; i < range_end+1+stride*2; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly includes %d\n", start, end, count, stride, i); + return false; + } + } + + return true; +} + +int main(int argc, char **argv) +{ + bool success = true; + + success &= test_unstrided(0, 10); + success &= test_unstrided(5, 10); + success &= test_unstrided(10, 10); + success &= test_unstrided(0, -1); + success &= test_unstrided(10, -1); + + success &= test_strided(0, 4, 2, 5); + success &= test_strided(0, 4, 2, 6); + success &= test_strided(0, 4, 10, 6); + success &= test_strided(0, 4, 10, 10); + + return success ? 0 : 1; +} + + diff --git a/test/subset/subset_subtraction.cpp b/test/subset/subset_subtraction.cpp new file mode 100644 index 0000000..c6c6cb9 --- /dev/null +++ b/test/subset/subset_subtraction.cpp @@ -0,0 +1,119 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "subset_common.hpp" + +using namespace Fenix; + +bool test_subtraction(const DataSubset& a, const DataSubset& b){ + const DataSubset c = a - b; + + size_t start = std::min(a.start(), b.start()); + size_t end; + if(a.end() == -1 || b.end() == -1){ + end = start+1000; + } else { + end = std::max(a.end(), b.end()) + 10; + } + + for(int i = start; i <= end; i++){ + if(c.includes(i) && !a.includes(i)){ + if(!a.includes(i)){ + printf("Result of a - b incorrectly includes index not in a\n"); + return false; + } + if(b.includes(i)){ + printf("Result of a - b incorrectly includes index in b\n"); + return false; + } + } + if(!c.includes(i)){ + if(a.includes(i) && !b.includes(i)){ + printf("Result of a - b incorrectly excludes index in a but not b\n"); + return false; + } + } + } + + return true; +} + +int main(int argc, char **argv) +{ + bool success = true; + + auto subsets = get_subsets(); + for(const auto& a : subsets){ + for(const auto& b : subsets){ + success &= test_subtraction(a, b); + } + } + + return success ? 0 : 1; +} + + diff --git a/test/subset_internal/CMakeLists.txt b/test/subset_internal/CMakeLists.txt deleted file mode 100644 index 4dcfc28..0000000 --- a/test/subset_internal/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -# -# This file is part of Fenix -# Copyright (c) 2016 Rutgers University and Sandia Corporation. -# This software is distributed under the BSD License. -# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -# the U.S. Government retains certain rights in this software. -# For more information, see the LICENSE file in the top Fenix -# directory. -# -add_executable(fenix_subset_internal_test fenix_subset_internal_test.c) -target_link_libraries(fenix_subset_internal_test fenix) - -add_test(subset_internal fenix_subset_internal_test "100" "3" "5" "7" "10") diff --git a/test/subset_merging/fenix_subset_merging_test.c b/test/subset_merging/fenix_subset_merging_test.c deleted file mode 100644 index 8ed6cd6..0000000 --- a/test/subset_merging/fenix_subset_merging_test.c +++ /dev/null @@ -1,176 +0,0 @@ -#include -#include -#include -void print_subset(Fenix_Data_subset *ss){ - printf("\tnum_blocks:\t %d\n", ss->num_blocks); - printf("\tstride:\t\t %d\n", ss->stride); - printf("\tspecifier:\t %d\n", ss->specifier); - printf("\tstart_offsets:\t ["); - for(int i = 0; i < ss->num_blocks; i++){ - printf( (i==0) ? "%d" : ", %d", ss->start_offsets[i]); - } - printf("]\n"); - printf("\tend_offsets:\t ["); - for(int i = 0; i < ss->num_blocks; i++){ - printf( (i==0) ? "%d" : ", %d", ss->end_offsets[i]); - } - printf("]\n"); - printf("\tnum_repeats:\t ["); - for(int i = 0; i < ss->num_blocks; i++){ - printf( (i==0) ? "%d" : ", %d", ss->num_repeats[i]); - } - printf("]\n"); -} - -int test_subset_main(Fenix_Data_subset *ss, int num_blocks, - int *start_offsets, int *end_offsets, int *num_repeats){ - if(ss->num_blocks != num_blocks){ - //Wrong, and we don't want to keep checking or we might segfault - return 0; - } - - //Current implementation maintains ordering, so this assumes the - //tester knows the expected output order. - int success = 1; - for(int i = 0; (i < num_blocks) && success; i++){ - success = success && ss->start_offsets[i] == start_offsets[i]; - success = success && ss->end_offsets[i] == end_offsets[i]; - success = success && ss->start_offsets[i] == start_offsets[i]; - } - - return success; -} - -int test_subset_create( Fenix_Data_subset *sub1, Fenix_Data_subset *sub2, - Fenix_Data_subset *sub3, int num_blocks, int stride, - int *start_offsets, int *end_offsets, int *num_repeats){ - int success = 1; - success = success && test_subset_main(sub3, num_blocks, start_offsets, end_offsets, num_repeats); - success = success && sub3->specifier == __FENIX_SUBSET_CREATE; - success = success && sub3->stride == stride; - - if(!success){ - printf("ERROR!\n"); - printf("sub1: \n"); - print_subset(sub1); - printf("sub2: \n"); - print_subset(sub2); - printf("sub3: \n"); - print_subset(sub3); - } else { - printf("Success\n"); - } - - __fenix_data_subset_free(sub1); - __fenix_data_subset_free(sub2); - __fenix_data_subset_free(sub3); - - return !success; //return failure status -} - -int test_subset_createv( Fenix_Data_subset *sub1, Fenix_Data_subset *sub2, - Fenix_Data_subset *sub3, int num_blocks, - int *start_offsets, int *end_offsets){ - int success = 1; - int* zeros = calloc(num_blocks, sizeof(int)); - success = success && test_subset_main(sub3, num_blocks, start_offsets, end_offsets, zeros); - free(zeros); - success = success && sub3->specifier == __FENIX_SUBSET_CREATEV; - - if(!success){ - printf("ERROR!\n"); - printf("sub1: \n"); - print_subset(sub1); - printf("sub2: \n"); - print_subset(sub2); - printf("sub3: \n"); - print_subset(sub3); - } else { - printf("Success\n"); - } - - __fenix_data_subset_free(sub1); - __fenix_data_subset_free(sub2); - __fenix_data_subset_free(sub3); - - return !success; -} - -int main(int argc, char **argv) { - Fenix_Data_subset sub1; - Fenix_Data_subset sub2; - Fenix_Data_subset sub3; - - int failure = 0; - - printf("Testing equivalent create subsets of same size & location: "); - Fenix_Data_subset_create(3, 2, 5, 5, &sub1); - Fenix_Data_subset_create(3, 2, 5, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){2}, (int[]){5}, (int[]){2}); - - printf("Testing equivalent create subsets, one within another: "); - Fenix_Data_subset_create(1, 17, 20, 5, &sub1); - Fenix_Data_subset_create(3, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){2}); - - printf("Testing equivalent create subsets in non-overlapping, continuous regions: "); - Fenix_Data_subset_create(1, 22, 25, 5, &sub1); - Fenix_Data_subset_create(2, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){2}); - - printf("Testing equivalent create subsets in non-overlapping, non-continuous regions: "); - Fenix_Data_subset_create(1, 22, 25, 5, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 2, 5, (int[]){22, 12}, (int[]){25, 15}, (int[]){1,0}); - - printf("Testing create subsets of same location: "); - Fenix_Data_subset_create(1, 13, 15, 5, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){0}); - - printf("Testing distinct create subsets with same stride: "); - Fenix_Data_subset_create(1, 17, 19, 5, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 2, 5, (int[]){17, 12}, (int[]){19, 15}, (int[]){0, 0}); - - printf("Testing distinct, overlapping create subsets with same stride: "); - Fenix_Data_subset_create(1, 17, 19, 5, &sub1); - Fenix_Data_subset_create(2, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){1}); - - printf("Testing distinct create subsets with unique stride: "); - Fenix_Data_subset_create(1, 17, 19, 6, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 2, (int[]){17, 12}, (int[]){19, 15}); - - printf("Testing distinct overlapping create subsets with unique stride: "); - Fenix_Data_subset_create(1, 13, 16, 6, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 1, (int[]){12}, (int[]){16}); - - printf("Testing complex createv subsets: "); - Fenix_Data_subset_createv(4, (int[]){1, 4, 21, 23}, (int[]){2, 17, 25, 26}, &sub1); - Fenix_Data_subset_createv(3, (int[]){0, 18, 30}, (int[]){1, 19, 30}, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 4, (int[]){0, 4, 21, 30}, (int[]){2, 19, 26, 30}); - - printf("Testing complex create and createv together: "); - Fenix_Data_subset_create(4, 11, 13, 10, &sub1); - Fenix_Data_subset_createv(3, (int[]){0, 12, 31}, (int[]){1, 20, 31}, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 4, (int[]){11, 31, 41, 0}, (int[]){23, 33, 43, 1}); - - - Fenix_Data_subset_delete(&sub1); - - return failure; -} From f2a5274b5b337fb53335d542af2c9f73867daa74 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 26 Jun 2025 15:20:11 -0500 Subject: [PATCH 08/21] Fix assertion for 1-rank test case --- src/fenix_data_policy_in_memory_raid.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index c813881..ad2444c 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -630,7 +630,7 @@ Group::Group( //my_rank should be one of the inserted ranks, or something in the //logic here is broken. - assert(partner_set.size() == 3); + assert(partner_set.size() == 3 || (comm_size==1 && partner_set.size()==1)); } } else if(mode == 5){ set_size = policy_vals[2]; From 18c7fad21d718e1fe9b523db2c9516c2fc0e3626 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 14:02:07 -0500 Subject: [PATCH 09/21] Use correct API in example --- examples/07_resizeable_member/resizeable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/07_resizeable_member/resizeable.cpp b/examples/07_resizeable_member/resizeable.cpp index 2ebe789..98b3697 100644 --- a/examples/07_resizeable_member/resizeable.cpp +++ b/examples/07_resizeable_member/resizeable.cpp @@ -166,7 +166,7 @@ int main(int argc, char **argv) { } //Resize data to fit all stored data - data.resize(stored_subset.end()+1); + data.resize(stored_subset.max_count()); //Set all data to a value that was never stored, just for testing for(int& i : data) i = -2; From 8d502b56260a175771afdfbeaddf3593f30e05ff Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 14:28:37 -0500 Subject: [PATCH 10/21] Rename some options for better encapsulation, add minor conveniences --- include/fenix.h | 15 ++++++---- include/fenix.hpp | 44 ++++++++++++++++++++---------- include/fenix_ext.hpp | 11 ++++++-- src/fenix.cpp | 6 ++-- src/{globals.cpp => fenix_ext.cpp} | 10 +++---- src/fenix_process_recovery.cpp | 12 ++++---- 6 files changed, 61 insertions(+), 37 deletions(-) rename src/{globals.cpp => fenix_ext.cpp} (93%) diff --git a/include/fenix.h b/include/fenix.h index 8c042fc..416bfb0 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -145,11 +145,11 @@ typedef enum { */ typedef enum { //!Return to Fenix_Init via longjmp (default) - JUMP, + FENIX_RESUME_JUMP, //!Return the error code inline - RETURN, + FENIX_RESUME_RETURN, //!Throw a Fenix::CommException - THROW + FENIX_RESUME_THROW } Fenix_Resume_mode; /** @@ -157,11 +157,11 @@ typedef enum { */ typedef enum { //!Ignore unhandled errors - SILENT, + FENIX_UNHANDLED_SILENT, //!Print error and continue without handling - PRINT, + FENIX_UNHANDLED_PRINT, //!Print error and abort Fenix's world (default) - ABORT + FENIX_UNHANDLED_ABORT } Fenix_Unhandled_mode; /** @@ -363,6 +363,8 @@ int Fenix_Finalize(); #define FENIX_DATA_POLICY_IN_MEMORY_RAID 13 #define FENIX_DATA_POLICY_IMR FENIX_DATA_POLICY_IN_MEMORY_RAID +#define FENIX_TIME_STAMP_IGNORE NULL + /** * @unimplemented As MPI_Request, but for Fenix asynchronous data recovery calls */ @@ -393,6 +395,7 @@ extern const Fenix_Data_subset FENIX_DATA_SUBSET_FULL; //!@brief A standin for checkpointing/recovering none of the available data in a member. extern const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY; +extern Fenix_Data_subset* FENIX_DATA_SUBSET_IGNORE; /** * @brief Create a Data Group diff --git a/include/fenix.hpp b/include/fenix.hpp index af6d1b2..6b1f6f7 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -75,18 +75,33 @@ int Fenix_Callback_register(std::function callback); namespace Fenix { +using Role = Fenix_Rank_role; +constexpr Role INITIAL_RANK = FENIX_ROLE_INITIAL_RANK; +constexpr Role RECOVERED_RANK = FENIX_ROLE_RECOVERED_RANK; +constexpr Role SURVIVOR_RANK = FENIX_ROLE_SURVIVOR_RANK; + +using ResumeMode = Fenix_Resume_mode; +constexpr ResumeMode JUMP = FENIX_RESUME_JUMP; +constexpr ResumeMode RETURN = FENIX_RESUME_RETURN; +constexpr ResumeMode THROW = FENIX_RESUME_THROW; + +using UnhandledMode = Fenix_Unhandled_mode; +constexpr UnhandledMode SILENT = FENIX_UNHANDLED_SILENT; +constexpr UnhandledMode PRINT = FENIX_UNHANDLED_PRINT; +constexpr UnhandledMode ABORT = FENIX_UNHANDLED_ABORT; + namespace Args { struct FenixInitArgs { - int* role = nullptr; - MPI_Comm in_comm = MPI_COMM_WORLD; - MPI_Comm* out_comm = nullptr; - int* argc = nullptr; - char*** argv = nullptr; - int spares = 0; - int spawn = 0; - Fenix_Resume_mode resume_mode = THROW; - Fenix_Unhandled_mode unhandled_mode = ABORT; - int* err = nullptr; + int* role = nullptr; + MPI_Comm in_comm = MPI_COMM_WORLD; + MPI_Comm* out_comm = nullptr; + int* argc = nullptr; + char*** argv = nullptr; + int spares = 0; + int spawn = 0; + ResumeMode resume_mode = THROW; + UnhandledMode unhandled_mode = ABORT; + int* err = nullptr; }; } @@ -99,8 +114,9 @@ void throw_exception(); namespace Fenix::Data { -extern const DataSubset FENIX_SUBSET_FULL; -extern const DataSubset FENIX_SUBSET_EMPTY; +extern const DataSubset SUBSET_FULL; +extern const DataSubset SUBSET_EMPTY; +extern DataSubset SUBSET_IGNORE; //!@brief Overload of #Fenix_Data_member_store int member_store(int group_id, int member_id, const DataSubset& subset); @@ -122,13 +138,13 @@ int member_istorev( //!@brief Overload of #Fenix_Data_member_restore int member_restore( - int group_id, int member_id, void *target_buffer, int max_count, + int group_id, int member_id, void *target_buffer, int max_length, int time_stamp, DataSubset& data_found ); //!@brief Overload of #Fenix_Data_member_lrestore int member_lrestore( - int group_id, int member_id, void *target_buffer, int max_count, + int group_id, int member_id, void *target_buffer, int max_length, int time_stamp, DataSubset& data_found ); diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index 0fce316..c95922e 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -60,18 +60,21 @@ #include #include #include "fenix.h" +#include "fenix.hpp" #include "fenix_opt.hpp" #include "fenix_process_recovery.hpp" #include "fenix_data_group.hpp" +namespace Fenix { + typedef struct { int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init int num_survivor_ranks = 0; // Keeps the global information on the number of survived MPI ranks after failure int num_recovered_ranks = 0; // Keeps the number of spare ranks brought into MPI communicator recovery int spare_ranks; // Spare ranks entered by user to repair failed ranks - int resume_mode = Fenix_Resume_mode::JUMP; - int unhandled_mode = Fenix_Unhandled_mode::ABORT; + ResumeMode resume_mode = JUMP; + UnhandledMode unhandled_mode = ABORT; int ignore_errs = false; // Temporarily ignore all errors & recovery int spawn_policy; // Indicate dynamic process spawning jmp_buf *recover_environment; // Calling environment to fill the jmp_buf structure @@ -107,5 +110,7 @@ typedef struct { Fenix::Data::fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure } fenix_t; -extern fenix_t fenix; +} + +extern Fenix::fenix_t fenix; #endif // __FENIX_EXT_H__ diff --git a/src/fenix.cpp b/src/fenix.cpp index d363b7e..7bc03be 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -65,12 +65,14 @@ using namespace Fenix; using namespace Fenix::Data; namespace Fenix::Data { -const DataSubset FENIX_SUBSET_FULL = {{0, Fenix::DataSubset::MAX}}; -const DataSubset FENIX_SUBSET_EMPTY = {}; +const DataSubset SUBSET_FULL = {{0, Fenix::DataSubset::MAX}}; +const DataSubset SUBSET_EMPTY = {}; +DataSubset SUBSET_IGNORE = SUBSET_EMPTY; } const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = { new DataSubset(DataSubset::MAX) }; const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY = { new DataSubset() }; +Fenix_Data_subset* FENIX_DATA_SUBSET_IGNORE = NULL; int Fenix_Callback_register(std::function callback){ return __fenix_callback_register(callback); diff --git a/src/globals.cpp b/src/fenix_ext.cpp similarity index 93% rename from src/globals.cpp rename to src/fenix_ext.cpp index 8285983..a11507c 100644 --- a/src/globals.cpp +++ b/src/fenix_ext.cpp @@ -12,7 +12,7 @@ // // // -// Copyright (C) 2018 Rutgers University and Sandia Corporation +// Copyright (C) 2016 Rutgers University and Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. @@ -44,8 +44,8 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// Michael Heroux, and Matthew Whitlock +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -56,6 +56,4 @@ #include "fenix_ext.hpp" -fenix_t fenix = { - .fenix_init_flag = 0 -}; +Fenix::fenix_t fenix; diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index 1435db6..69def07 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -193,22 +193,22 @@ int fenix_preinit(const Args::FenixInitArgs& args, jmp_buf* jump_env){ Fenix_Resume_mode get_resume_mode(const std::string_view& name){ if (name == "JUMP") { - return Fenix_Resume_mode::JUMP; + return Fenix::JUMP; } else if (name == "RETURN") { - return Fenix_Resume_mode::RETURN; + return Fenix::RETURN; } else if (name == "THROW") { - return Fenix_Resume_mode::THROW; + return Fenix::THROW; } fatal_print("Unsupported FENIX_RESUME_MODE %s", name.data()); } Fenix_Unhandled_mode get_unhandled_mode(const std::string_view& name){ if (name == "SILENT") { - return Fenix_Unhandled_mode::SILENT; + return Fenix::SILENT; } else if (name == "PRINT") { - return Fenix_Unhandled_mode::PRINT; + return Fenix::PRINT; } else if (name == "ABORT") { - return Fenix_Unhandled_mode::ABORT; + return Fenix::ABORT; } fatal_print("Unsupported FENIX_UNHANDLED_MODE %s", name.data()); } From b6b84f4547fb11decb8ac2290ea026aeb9c47292 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 14:49:11 -0500 Subject: [PATCH 11/21] Expand c++ API parity, dedup --- include/fenix.hpp | 44 ++++++++++++++ include/fenix_data_group.hpp | 4 -- include/fenix_data_recovery.hpp | 17 ------ src/CMakeLists.txt | 2 +- src/fenix.cpp | 73 +++++++++--------------- src/fenix_data_group.cpp | 4 +- src/fenix_data_policy_in_memory_raid.cpp | 6 +- src/fenix_data_recovery.cpp | 44 ++++++++++---- 8 files changed, 110 insertions(+), 84 deletions(-) diff --git a/include/fenix.hpp b/include/fenix.hpp index 6b1f6f7..5f26533 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -110,6 +110,24 @@ void init(const Args::FenixInitArgs args); //!@brief Throw an exception for the most recent fault. Helpful for spares. void throw_exception(); +//!@brief Overload of #Fenix_get_role +Fenix_Rank_role role(); + +//!@brief Overload of #Fenix_get_error +int error(); + +//!@brief Overload of #Fenix_Callback_register +int callback_register(std::function callback); + +//@!brief Overload of #Fenix_Callback_pop +int callback_pop(); + +//!@brief Overload of #Fenix_Process_detect_failures +int detect_failures(bool recover = true); + +//!@brief Overload of #Fenix_Initialized that directly returns true if initialized +bool initialized(); + } // namespace Fenix namespace Fenix::Data { @@ -118,6 +136,17 @@ extern const DataSubset SUBSET_FULL; extern const DataSubset SUBSET_EMPTY; extern DataSubset SUBSET_IGNORE; +//@!brief Overload of Fenix_Data_group_create +int group_create( + int group_id, MPI_Comm comm, int start_time_stamp, int depth, + int policy_name, void* policy_value, int* flag +); + +//@!brief Overload of Fenix_Data_member_create +int member_create( + int group_id, int member_id, void* buffer, int count, MPI_Datatype datatype +); + //!@brief Overload of #Fenix_Data_member_store int member_store(int group_id, int member_id, const DataSubset& subset); @@ -148,6 +177,21 @@ int member_lrestore( int time_stamp, DataSubset& data_found ); +//@!brief overload of #Fenix_Data_commit +int commit(int group_id, int* time_stamp = nullptr); + +//@!brief overload of #Fenix_Data_commit +int commit_barrier(int group_id, int* time_stamp = nullptr); + +//@!brief Overload of #Fenix_Data_snapshot_delete +int snapshot_delete(int group_id, int timestamp); + +//@!brief overload of Fenix_Data_group_delete +int group_delete(int group_id); + +//@!brief overload of Fenix_Data_member_delete +int member_delete(int group_id, int member_id); + } // namespace Fenix::Data #endif diff --git a/include/fenix_data_group.hpp b/include/fenix_data_group.hpp index 0c2f258..90f7691 100644 --- a/include/fenix_data_group.hpp +++ b/include/fenix_data_group.hpp @@ -125,10 +125,6 @@ typedef struct __group_entry_packet { fenix_data_recovery_t * __fenix_data_recovery_init(); -int __fenix_group_delete(int groupid); - -int __fenix_member_delete(int groupid, int memberid); - void __fenix_data_recovery_destroy( fenix_data_recovery_t *fx_data_recovery ); void __fenix_data_recovery_reinit( fenix_data_recovery_t *dr, fenix_two_container_packet_t packet); diff --git a/include/fenix_data_recovery.hpp b/include/fenix_data_recovery.hpp index 8610aad..b27efac 100644 --- a/include/fenix_data_recovery.hpp +++ b/include/fenix_data_recovery.hpp @@ -98,20 +98,10 @@ typedef struct __data_entry_packet { } fenix_data_entry_packet_t; -int __fenix_group_create(int, MPI_Comm, int, int, int, void*, int*); int __fenix_group_get_redundancy_policy(int, int*, int*, int*); -int __fenix_member_create(int, int, void *, int, int); int __fenix_data_wait(Fenix_Request); int __fenix_data_test(Fenix_Request, int *); -int __fenix_member_store(int, int, const DataSubset&); -int __fenix_member_storev(int, int, const DataSubset&); -int __fenix_member_istore(int, int, const DataSubset&, Fenix_Request *); -int __fenix_member_istorev(int, int, const DataSubset&, Fenix_Request *); -int __fenix_data_commit(int, int *); -int __fenix_data_commit_barrier(int, int *); int __fenix_data_barrier(int); -int __fenix_member_restore(int, int, void *, int, int, DataSubset&); -int __fenix_member_lrestore(int, int, void *, int, int, DataSubset&); int __fenix_member_restore_from_rank(int, int, void *, int, int, int); int __fenix_get_number_of_members(int, int *); int __fenix_get_member_at_position(int, int *, int); @@ -119,13 +109,6 @@ int __fenix_get_number_of_snapshots(int, int *); int __fenix_get_snapshot_at_position(int, int, int *); int __fenix_member_get_attribute(int, int, int, void *, int *, int); int __fenix_member_set_attribute(int, int, int, void *, int *); -int __fenix_snapshot_delete(int groupid, int timestamp); - -int __fenix_group_delete(int); -int __fenix_member_delete(int, int); - -void __fenix_init_data_recovery(); -void __fenix_init_partner_copy_recovery(); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2f67a07..b48defd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,7 +26,7 @@ set (Fenix_SOURCES fenix_data_member.cpp fenix_data_subset.cpp fenix_callbacks.cpp - globals.cpp + fenix_ext.cpp ) add_library(fenix STATIC ${Fenix_SOURCES}) diff --git a/src/fenix.cpp b/src/fenix.cpp index 7bc03be..43f8ad8 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -98,13 +98,17 @@ int Fenix_Finalize() { return FENIX_SUCCESS; } -int Fenix_Data_group_create( int group_id, MPI_Comm comm, int start_time_stamp, int depth, int policy_name, - void* policy_value, int* flag) { - return __fenix_group_create(group_id, comm, start_time_stamp, depth, policy_name, policy_value, flag); +int Fenix_Data_group_create( + int group_id, MPI_Comm comm, int start_time_stamp, int depth, int policy, + void* policy_args, int* flag +) { + return group_create( + group_id, comm, start_time_stamp, depth, policy, policy_args, flag + ); } int Fenix_Data_member_create( int group_id, int member_id, void *buffer, int count, MPI_Datatype datatype ) { - return __fenix_member_create(group_id, member_id, buffer, count, __fenix_get_size(datatype)); + return member_create(group_id, member_id, buffer, count, datatype); } int Fenix_Data_group_get_redundancy_policy( int group_id, int* policy_name, void *policy_value, int *flag ) { @@ -136,11 +140,11 @@ int Fenix_Data_member_istorev(int group_id, int member_id, const Fenix_Data_subs } int Fenix_Data_commit(int group_id, int *time_stamp) { - return __fenix_data_commit(group_id, time_stamp); + return commit(group_id, time_stamp); } int Fenix_Data_commit_barrier(int group_id, int *time_stamp) { - return __fenix_data_commit_barrier(group_id, time_stamp); + return commit_barrier(group_id, time_stamp); } int Fenix_Data_barrier(int group_id) { @@ -214,15 +218,15 @@ int Fenix_Data_member_attr_set(int group_id, int member_id, int attribute_name, } int Fenix_Data_snapshot_delete(int group_id, int time_stamp) { - return __fenix_snapshot_delete(group_id, time_stamp); + return snapshot_delete(group_id, time_stamp); } int Fenix_Data_group_delete(int group_id) { - return __fenix_group_delete(group_id); + return group_delete(group_id); } int Fenix_Data_member_delete(int group_id, int member_id) { - return __fenix_member_delete(group_id, member_id); + return member_delete(group_id, member_id); } int Fenix_Process_fail_list(int** fail_list){ @@ -250,11 +254,11 @@ int Fenix_Process_detect_failures(int do_recovery){ } Fenix_Rank_role Fenix_get_role(){ - return (Fenix_Rank_role) fenix.role; + return role(); } int Fenix_get_error(){ - return fenix.repair_result; + return error(); } namespace Fenix { @@ -270,49 +274,28 @@ void throw_exception(){ throw CommException(*fenix.user_world, *fenix.ret_error); } -} // namespace Fenix - -namespace Fenix::Data { - -int member_store(int group_id, int member_id, const DataSubset& subset){ - return __fenix_member_store(group_id, member_id, subset); +Fenix_Rank_role role(){ + return (Fenix_Rank_role) fenix.role; } -int member_storev(int group_id, int member_id, const DataSubset& subset){ - return __fenix_member_storev(group_id, member_id, subset); +int error(){ + return fenix.repair_result; } -int member_istore( - int group_id, int member_id, const DataSubset& subset, - Fenix_Request *request -){ - fatal_print("unimplemented"); - return 0; +int callback_register(std::function callback){ + return __fenix_callback_register(callback); } -int member_istorev( - int group_id, int member_id, const DataSubset& subset, - Fenix_Request *request -){ - fatal_print("unimplemented"); - return 0; +int callback_pop() { + return __fenix_callback_pop(); } -int member_restore( - int group_id, int member_id, void *target_buffer, int max_count, - int time_stamp, DataSubset& data_found -) { - data_found = {}; - return __fenix_member_restore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); +int detect_failures(bool recover){ + return __fenix_detect_failures(recover); } -int member_lrestore( - int group_id, int member_id, void *target_buffer, int max_count, - int time_stamp, DataSubset& data_found -) { - data_found = {}; - return __fenix_member_lrestore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); +bool initialized(){ + return fenix.fenix_init_flag; } -} // namespace Fenix::Data - +} // namespace Fenix diff --git a/src/fenix_data_group.cpp b/src/fenix_data_group.cpp index 197915f..ec9145f 100644 --- a/src/fenix_data_group.cpp +++ b/src/fenix_data_group.cpp @@ -110,7 +110,7 @@ fenix_data_recovery_t * __fenix_data_recovery_init() { return data_recovery; } -int __fenix_member_delete(int groupid, int memberid) { +int member_delete(int groupid, int memberid) { auto [group_index, group] = find_group(groupid); if(!group) return FENIX_ERROR_INVALID_GROUPID; @@ -173,7 +173,7 @@ int __fenix_data_recovery_remove_group(int group_index){ * @brief * @param group_id */ -int __fenix_group_delete(int groupid) { +int group_delete(int groupid) { auto [group_index, group] = find_group(groupid); if(!group) return FENIX_ERROR_INVALID_GROUPID; diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index ad2444c..01b92ff 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -849,10 +849,10 @@ int Group::member_restore( ); if(!found_members[set_rank]){ - __fenix_member_create( - groupid, packet.memberid, target_buffer, packet.current_count, + this->member_create(__fenix_data_member_add_entry( + this, packet.memberid, target_buffer, packet.current_count, packet.datatype_size - ); + )); member = find_member(member_id); assert(member); } diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index 0f8fb7d..9875cdb 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -56,6 +56,7 @@ +#include "fenix.hpp" #include "fenix_data_recovery.hpp" #include "fenix_data_policy.hpp" #include "fenix_opt.hpp" @@ -74,7 +75,7 @@ namespace Fenix::Data { * @param time_start * @param depth */ -int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, int policy_name, +int group_create( int groupid, MPI_Comm comm, int timestart, int depth, int policy_name, void* policy_value, int* flag) { int retval = -1; @@ -191,7 +192,9 @@ int __fenix_group_get_redundancy_policy(int groupid, int* policy_name, int* poli * @param count * @param data_type */ -int __fenix_member_create(int groupid, int memberid, void *data, int count, int datatype_size ) { +int member_create( + int groupid, int memberid, void *data, int count, MPI_Datatype datatype +) { auto [group_index, group] = find_group(groupid); if(!group) return FENIX_ERROR_INVALID_GROUPID; @@ -210,7 +213,7 @@ int __fenix_member_create(int groupid, int memberid, void *data, int count, int //First, we'll make a fenix-core member entry, then pass that info to //the specific data policy. - mentry = __fenix_data_member_add_entry(group, memberid, data, count, datatype_size); + mentry = __fenix_data_member_add_entry(group, memberid, data, count, __fenix_get_size(datatype)); //Pass the info along to the policy return group->member_create(mentry); @@ -270,7 +273,7 @@ int __fenix_data_test(Fenix_Request request, int *flag) { * */ -int __fenix_member_store(int groupid, int memberid, const DataSubset& specifier) { +int member_store(int groupid, int memberid, const DataSubset& specifier) { auto [group_index, group] = find_group(groupid); if(!group){ debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist", groupid); @@ -280,7 +283,7 @@ int __fenix_member_store(int groupid, int memberid, const DataSubset& specifier) return group->member_store(memberid, specifier); } -int __fenix_member_storev(int groupid, int memberid, const DataSubset& specifier) { +int member_storev(int groupid, int memberid, const DataSubset& specifier) { auto [group_index, group] = find_group(groupid); if(!group){ debug_print("ERROR Fenix_Data_member_storev: group_id <%d> does not exist", groupid); @@ -297,8 +300,22 @@ int __fenix_member_storev(int groupid, int memberid, const DataSubset& specifier * @param subset_specifier * @param request */ -int __fenix_member_istore(int groupid, int memberid, const DataSubset& specifier, +int member_istore(int groupid, int memberid, const DataSubset& specifier, Fenix_Request *request) { + fatal_print("unimplemented"); + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_istore: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; + } + + return group->member_istore(memberid, specifier, request); +} + +int member_istorev( + int groupid, int memberid, const DataSubset& specifier, Fenix_Request *request +) { + fatal_print("unimplemented"); auto [group_index, group] = find_group(groupid); if(!group){ debug_print("ERROR Fenix_Data_member_istore: group_id <%d> does not exist", groupid); @@ -314,7 +331,7 @@ int __fenix_member_istore(int groupid, int memberid, const DataSubset& specifier * @param group_id * @param time_stamp */ -int __fenix_data_commit(int groupid, int *timestamp) { +int commit(int groupid, int *timestamp) { /* No communication is performed */ /* Return the new timestamp */ int retval = -1; @@ -347,7 +364,7 @@ int __fenix_data_commit(int groupid, int *timestamp) { * @param group_id * @param time_stamp */ -int __fenix_data_commit_barrier(int groupid, int *timestamp) { +int commit_barrier(int groupid, int *timestamp) { int retval = -1; int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); if (fenix.options.verbose == 23) { @@ -407,8 +424,10 @@ int __fenix_data_commit_barrier(int groupid, int *timestamp) { * @param max_count * @param time_stamp */ -int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { +int member_restore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { int retval = FENIX_SUCCESS; + data_found = {}; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); if (fenix.options.verbose == 25) { @@ -438,9 +457,10 @@ int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, * @param max_count * @param time_stamp */ -int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { - +int member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { int retval = FENIX_SUCCESS; + data_found = {}; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); int member_index = -1; @@ -676,7 +696,7 @@ int __fenix_member_set_attribute(int groupid, int memberid, int attributename, * @param group_id * @param time_stamp */ -int __fenix_snapshot_delete(int group_id, int time_stamp) { +int snapshot_delete(int group_id, int time_stamp) { int retval = -1; int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); if (group_index == -1) { From 75ceff077fd4983a8af71ec71368dc416816545c Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 15:04:35 -0500 Subject: [PATCH 12/21] New basic convenience functions --- include/fenix.h | 3 +++ include/fenix.hpp | 23 ++++++++++++++++++++ include/fenix_data_group.hpp | 2 ++ include/fenix_data_policy_in_memory_raid.hpp | 1 + src/fenix.cpp | 13 +++++++++++ src/fenix_data_group.cpp | 9 ++++++++ src/fenix_data_policy_in_memory_raid.cpp | 4 ++++ src/fenix_data_recovery.cpp | 12 ++++++++++ 8 files changed, 67 insertions(+) diff --git a/include/fenix.h b/include/fenix.h index 416bfb0..9c0c71e 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -300,6 +300,9 @@ Fenix_Rank_role Fenix_get_role(); //!@brief Returns the error value from Fenix_Init or the latest recovery int Fenix_get_error(); +//!@brief Returns the number of spare ranks currently available to Fenix +int Fenix_get_nspare(); + /** * @brief Get the list of ranks that failed in the most recent failure. * @param[out] fail_list Set to a list of failed ranks. diff --git a/include/fenix.hpp b/include/fenix.hpp index 5f26533..795c6a8 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -60,6 +60,8 @@ #include #include +#include +#include #include "fenix.h" #include "fenix_exception.hpp" #include "fenix_data_subset.hpp" @@ -116,12 +118,21 @@ Fenix_Rank_role role(); //!@brief Overload of #Fenix_get_error int error(); +//!@brief Overload of #Fenix_get_nspare +int nspare(); + //!@brief Overload of #Fenix_Callback_register int callback_register(std::function callback); //@!brief Overload of #Fenix_Callback_pop int callback_pop(); +/** + * @brief Get the failed ranks from the most recent recovery + * @return vector of failed ranks + */ +std::vector fail_list(); + //!@brief Overload of #Fenix_Process_detect_failures int detect_failures(bool recover = true); @@ -183,6 +194,18 @@ int commit(int group_id, int* time_stamp = nullptr); //@!brief overload of #Fenix_Data_commit int commit_barrier(int group_id, int* time_stamp = nullptr); +/** + * @brief get the members of a group + * @return vector of member IDs of each member in group_id if group exists + */ +std::optional> group_members(int group_id); + +/** + * @brief get the snapshots of a group + * @return vector of timestamps of each snapshot in group_id if group exists + */ +std::optional> group_snapshots(int group_id); + //@!brief Overload of #Fenix_Data_snapshot_delete int snapshot_delete(int group_id, int timestamp); diff --git a/include/fenix_data_group.hpp b/include/fenix_data_group.hpp index 90f7691..0e84b01 100644 --- a/include/fenix_data_group.hpp +++ b/include/fenix_data_group.hpp @@ -84,6 +84,7 @@ struct fenix_group_t { int policy_name; std::map members; + std::vector get_member_ids(); //Search for id, returning {-1, nullptr} if not found. Fenix::Data::member_iterator search_member(int id); //As search_member, but print an error message if id not found. @@ -105,6 +106,7 @@ struct fenix_group_t { virtual int member_restore_from_rank(int member_id, void* target_bugger, int max, int timestamp, int source_rank) = 0; virtual int get_number_of_snapshots(int* num) = 0; virtual int get_snapshot_at_position(int position, int* timestamp) = 0; + virtual std::vector get_snapshots() = 0; virtual int reinit(int* flag) = 0; virtual int member_get_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag, int sourcerank) = 0; virtual int member_set_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag) = 0; diff --git a/include/fenix_data_policy_in_memory_raid.hpp b/include/fenix_data_policy_in_memory_raid.hpp index 8063cbf..02759ed 100644 --- a/include/fenix_data_policy_in_memory_raid.hpp +++ b/include/fenix_data_policy_in_memory_raid.hpp @@ -226,6 +226,7 @@ struct Group : public fenix_group_t { int get_number_of_snapshots(int* number_of_snapshots) override; int get_snapshot_at_position(int position, int* timestamp) override; + std::vector get_snapshots(); int reinit(int* flag) override; }; diff --git a/src/fenix.cpp b/src/fenix.cpp index 43f8ad8..b200c06 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -261,6 +261,10 @@ int Fenix_get_error(){ return error(); } +int Fenix_get_nspare(){ + return nspare(); +} + namespace Fenix { void init(const Args::FenixInitArgs args){ @@ -282,6 +286,10 @@ int error(){ return fenix.repair_result; } +int nspare(){ + return fenix.spare_ranks; +} + int callback_register(std::function callback){ return __fenix_callback_register(callback); } @@ -290,6 +298,11 @@ int callback_pop() { return __fenix_callback_pop(); } +std::vector fail_list(){ + if(fenix.fail_world_size == 0) return {}; + return {fenix.fail_world, fenix.fail_world+fenix.fail_world_size}; +} + int detect_failures(bool recover){ return __fenix_detect_failures(recover); } diff --git a/src/fenix_data_group.cpp b/src/fenix_data_group.cpp index ec9145f..928dbe1 100644 --- a/src/fenix_data_group.cpp +++ b/src/fenix_data_group.cpp @@ -92,6 +92,15 @@ member_iterator fenix_group_t::find_member(int id){ return it; } +std::vector fenix_group_t::get_member_ids(){ + std::vector ret; + ret.reserve(members.size()); + for(const auto& [k, v] : members){ + ret.push_back(k); + } + return ret; +} + fenix_data_recovery_t * __fenix_data_recovery_init() { fenix_data_recovery_t *data_recovery = (fenix_data_recovery_t *) s_calloc(1, sizeof(fenix_data_recovery_t)); diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index 01b92ff..a08697c 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -796,6 +796,10 @@ int Group::get_snapshot_at_position(int idx, int* snapshot){ return FENIX_SUCCESS; } +std::vector Group::get_snapshots(){ + return {timestamps.begin(), timestamps.end()}; +} + int Group::member_restore( int member_id, void* target_buffer, int max_count, int ts, DataSubset& data_found diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index 9875cdb..1ef833b 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -556,6 +556,18 @@ int __fenix_get_member_at_position(int group_id, int *member_id, int position) { return FENIX_SUCCESS; } +std::optional> group_members(int group_id){ + auto [group_index, group] = find_group(group_id); + if(!group) return {}; + return group->get_member_ids(); +} + +std::optional> group_snapshots(int group_id){ + auto [group_index, group] = find_group(group_id); + if(!group) return {}; + return group->get_snapshots(); +} + /** * @brief * @param group_id From 9716e22d58e8a090e3b67daecae21bf211eff40b Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 15:06:07 -0500 Subject: [PATCH 13/21] Small bugfix --- src/fenix_data_policy_in_memory_raid.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index a08697c..1f192a7 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -530,7 +530,7 @@ int Member::lrestore( int begin = end > 0 ? end-1 : end; if(max_restore != 0){ - for(int i = end-1; i >= 0 && !recovered.includes_all(max_restore) ; i--){ + for(int i = end-1; i >= 0 && !recovered.includes_all(max_restore-1) ; i--){ if(entries[i].timestamp < 0) break; begin = i; recovered += entries[i].region; @@ -545,7 +545,7 @@ int Member::lrestore( } if(end <= 0) return FENIX_ERROR_NODATA_FOUND; - if(max_restore != 0 && !recovered.includes_all(max_restore)) + if(max_restore != 0 && !recovered.includes_all(max_restore-1)) return FENIX_WARNING_PARTIAL_RESTORE; return FENIX_SUCCESS; } From d26926671bc942ec9269d7cadcb0a5144838f12c Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 15:08:49 -0500 Subject: [PATCH 14/21] Allow user to invoke callbacks, better define callback behavior when nested faults occur --- include/fenix.h | 5 +++++ include/fenix.hpp | 29 +++++++++++++++++++---------- include/fenix_ext.hpp | 2 ++ include/fenix_process_recovery.hpp | 2 +- src/fenix.cpp | 8 ++++++++ src/fenix_callbacks.cpp | 29 +++++++++++++++++++++++++---- src/fenix_process_recovery.cpp | 14 ++++++++------ 7 files changed, 68 insertions(+), 21 deletions(-) diff --git a/include/fenix.h b/include/fenix.h index 9c0c71e..1a0c626 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -279,6 +279,11 @@ int Fenix_Callback_register(void (*recover)(MPI_Comm, int, void *), */ int Fenix_Callback_pop(); +/** + * @brief Invoke all callbacks with information from the last recovered fault + */ +void Fenix_Callback_invoke_all(); + /** * @brief Check for any failed ranks * diff --git a/include/fenix.hpp b/include/fenix.hpp index 795c6a8..85acfca 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -87,6 +87,11 @@ constexpr ResumeMode JUMP = FENIX_RESUME_JUMP; constexpr ResumeMode RETURN = FENIX_RESUME_RETURN; constexpr ResumeMode THROW = FENIX_RESUME_THROW; +enum CallbackExceptionMode { + RETHROW, + SQUASH +}; + using UnhandledMode = Fenix_Unhandled_mode; constexpr UnhandledMode SILENT = FENIX_UNHANDLED_SILENT; constexpr UnhandledMode PRINT = FENIX_UNHANDLED_PRINT; @@ -94,16 +99,17 @@ constexpr UnhandledMode ABORT = FENIX_UNHANDLED_ABORT; namespace Args { struct FenixInitArgs { - int* role = nullptr; - MPI_Comm in_comm = MPI_COMM_WORLD; - MPI_Comm* out_comm = nullptr; - int* argc = nullptr; - char*** argv = nullptr; - int spares = 0; - int spawn = 0; - ResumeMode resume_mode = THROW; - UnhandledMode unhandled_mode = ABORT; - int* err = nullptr; + int* role = nullptr; + MPI_Comm in_comm = MPI_COMM_WORLD; + MPI_Comm* out_comm = nullptr; + int* argc = nullptr; + char*** argv = nullptr; + int spares = 0; + int spawn = 0; + ResumeMode resume_mode = THROW; + CallbackExceptionMode callback_exception_mode = RETHROW; + UnhandledMode unhandled_mode = ABORT; + int* err = nullptr; }; } @@ -127,6 +133,9 @@ int callback_register(std::function callback); //@!brief Overload of #Fenix_Callback_pop int callback_pop(); +//@!brief Overload of #Fenix_Callback_invoke_all +void callback_invoke_all(); + /** * @brief Get the failed ranks from the most recent recovery * @return vector of failed ranks diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index c95922e..6066e01 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -74,11 +74,13 @@ typedef struct { int spare_ranks; // Spare ranks entered by user to repair failed ranks ResumeMode resume_mode = JUMP; + CallbackExceptionMode callback_exception_mode = RETHROW; UnhandledMode unhandled_mode = ABORT; int ignore_errs = false; // Temporarily ignore all errors & recovery int spawn_policy; // Indicate dynamic process spawning jmp_buf *recover_environment; // Calling environment to fill the jmp_buf structure + int mpi_fail_code = MPI_SUCCESS; int repair_result = FENIX_SUCCESS; // Internal variable to store the result of MPI comm repair int role = FENIX_ROLE_INITIAL_RANK; diff --git a/include/fenix_process_recovery.hpp b/include/fenix_process_recovery.hpp index bfcced3..c2f26b3 100644 --- a/include/fenix_process_recovery.hpp +++ b/include/fenix_process_recovery.hpp @@ -98,7 +98,7 @@ int __fenix_callback_register(fenix_callback_func& recover); int __fenix_callback_pop(); -void __fenix_callback_invoke_all(int error); +void __fenix_callback_invoke_all(); int* __fenix_get_fail_ranks(int *, int, int); diff --git a/src/fenix.cpp b/src/fenix.cpp index b200c06..1bbc058 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -88,6 +88,10 @@ int Fenix_Callback_pop() { return __fenix_callback_pop(); } +void Fenix_Callback_invoke_all() { + __fenix_callback_invoke_all(); +} + int Fenix_Initialized(int *flag) { *flag = (fenix.fenix_init_flag) ? 1 : 0; return FENIX_SUCCESS; @@ -298,6 +302,10 @@ int callback_pop() { return __fenix_callback_pop(); } +void callback_invoke_all() { + __fenix_callback_invoke_all(); +} + std::vector fail_list(){ if(fenix.fail_world_size == 0) return {}; return {fenix.fail_world, fenix.fail_world+fenix.fail_world_size}; diff --git a/src/fenix_callbacks.cpp b/src/fenix_callbacks.cpp index 5f981ba..982db02 100644 --- a/src/fenix_callbacks.cpp +++ b/src/fenix_callbacks.cpp @@ -62,8 +62,10 @@ #include "fenix_data_recovery.hpp" #include "fenix_opt.hpp" #include "fenix_util.hpp" +#include "fenix_exception.hpp" #include +using namespace Fenix; int __fenix_callback_register(fenix_callback_func& recover) { @@ -83,9 +85,28 @@ int __fenix_callback_pop(){ return FENIX_SUCCESS; } -void __fenix_callback_invoke_all(int error) -{ - for(auto it = fenix.callbacks.rbegin(); it != fenix.callbacks.rend(); it++){ - (*it)(*fenix.user_world, error); +void __fenix_callback_invoke_all(){ + //If callbacks are invoked in a nested manner due to caught exceptions + //within a callback, we want to only finish the most recent call. All prior + //calls should exit as soon as control returns. + static int callbacks_depth = 0; + int m_callbacks_layer = callbacks_depth++; + + try { + for(auto& cb : fenix.callbacks) { + if(callbacks_depth != m_callbacks_layer+1) break; + cb(*fenix.user_world, fenix.mpi_fail_code); + } + } catch (const CommException& e) { + switch(fenix.callback_exception_mode){ + case(RETHROW): + if(m_callbacks_layer == 0) callbacks_depth = 0; + throw; + case(SQUASH): + break; + } } + + //Reset the callback depth when leaving the outermost call + if(m_callbacks_layer == 0) callbacks_depth = 0; } diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index 69def07..8de0629 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -112,6 +112,7 @@ int fenix_preinit(const Args::FenixInitArgs& args, jmp_buf* jump_env){ fenix.spawn_policy = args.spawn; fenix.recover_environment = jump_env; fenix.resume_mode = args.resume_mode; + fenix.callback_exception_mode = args.callback_exception_mode; fenix.unhandled_mode = args.unhandled_mode; fenix.ret_role = args.role ? args.role : &fenix.role; fenix.ret_error = args.err ? args.err : &fenix.repair_result; @@ -663,9 +664,10 @@ void __fenix_postinit() 34095347, fenix.new_world, &fenix.check_failures_req); } - if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { - __fenix_callback_invoke_all(*fenix.ret_error); + if(fenix.role != FENIX_ROLE_INITIAL_RANK) { + __fenix_callback_invoke_all(); } + if (fenix.options.verbose == 9) { verbose_print("After barrier. current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role); @@ -798,17 +800,17 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) { int ret_repair; int index; - int ret = *pret; + fenix.mpi_fail_code = *pret; + if(!fenix.fenix_init_flag || __fenix_spare_rank() == 1 || fenix.ignore_errs) { return; } - switch (ret) { + switch (fenix.mpi_fail_code) { case MPI_ERR_PROC_FAILED_PENDING: case MPI_ERR_PROC_FAILED: MPIX_Comm_revoke(*fenix.world); MPIX_Comm_revoke(fenix.new_world); - if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); fenix.repair_result = __fenix_repair_ranks(); @@ -819,7 +821,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) default: int len; char errstr[MPI_MAX_ERROR_STRING]; - MPI_Error_string(ret, errstr, &len); + MPI_Error_string(fenix.mpi_fail_code, errstr, &len); switch (fenix.unhandled_mode) { case ABORT: fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); From 754192ae5f10a72fdf0899203b80c6c306710e70 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 8 Jul 2025 15:13:41 -0500 Subject: [PATCH 15/21] Unconditionally reorder processes Solves bug when recovering with no spare ranks - no fail_list is allocated, causing segfaults when attempting to use it --- src/fenix_process_recovery.cpp | 170 ++++++++++++++++----------------- 1 file changed, 80 insertions(+), 90 deletions(-) diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index 8de0629..b563b49 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -359,121 +359,111 @@ int __fenix_repair_ranks() rt_code = FENIX_WARNING_SPARE_RANKS_DEPLETED; - if (fenix.spare_ranks != 0) { + /***************************************/ + /* Fill the ranks in increasing order */ + /***************************************/ - /***************************************/ - /* Fill the ranks in increasing order */ - /***************************************/ + int active_ranks; - int active_ranks; + survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); - survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); + ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, + world_without_failures); - ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, - world_without_failures); - - if (fenix.options.verbose == 2) { - int index; - for (index = 0; index < survivor_world_size; index++) { - verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", - current_rank, fenix.role, index, - survivor_world[index]); - } + if (fenix.options.verbose == 2) { + int index; + for (index = 0; index < survivor_world_size; index++) { + verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", + current_rank, fenix.role, index, + survivor_world[index]); } + } - //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } - if (ret != MPI_SUCCESS) { - repair_success = 0; - if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); - } - MPI_Comm_free(&world_without_failures); - free(survivor_world); - goto END_LOOP; + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } + if (ret != MPI_SUCCESS) { + repair_success = 0; + if (ret == MPI_ERR_PROC_FAILED) { + MPIX_Comm_revoke(world_without_failures); } + MPI_Comm_free(&world_without_failures); + free(survivor_world); + goto END_LOOP; + } - survived_flag = 0; - if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { - survived_flag = 1; - } + survived_flag = 0; + if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { + survived_flag = 1; + } - ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, - MPI_INT, MPI_SUM, world_without_failures); + ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, + MPI_INT, MPI_SUM, world_without_failures); - //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } - if (ret != MPI_SUCCESS) { - repair_success = 0; - if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); - } - MPI_Comm_free(&world_without_failures); - free(survivor_world); - goto END_LOOP; + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } + if (ret != MPI_SUCCESS) { + repair_success = 0; + if (ret == MPI_ERR_PROC_FAILED) { + MPIX_Comm_revoke(world_without_failures); } + MPI_Comm_free(&world_without_failures); + free(survivor_world); + goto END_LOOP; + } - fenix.num_inital_ranks = 0; + fenix.num_inital_ranks = 0; - /* recovered ranks must be the number of spare ranks */ - fenix.num_recovered_ranks = fenix.fail_world_size; + /* recovered ranks must be the number of spare ranks */ + fenix.num_recovered_ranks = fenix.fail_world_size; - if (fenix.options.verbose == 2) { - verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", - current_rank, fenix.role, - fenix.num_recovered_ranks); - } - - if(fenix.role != FENIX_ROLE_INITIAL_RANK){ - free(fenix.fail_world); - } - fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, - fenix.fail_world_size); + if (fenix.options.verbose == 2) { + verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", + current_rank, fenix.role, + fenix.num_recovered_ranks); + } - if (fenix.options.verbose == 2) { - int index; - for (index = 0; index < fenix.fail_world_size; index++) { - verbose_print("fail_world[%d]: %d\n", index, fenix.fail_world[index]); - } + if(fenix.role != FENIX_ROLE_INITIAL_RANK){ + free(fenix.fail_world); + } + fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, + fenix.fail_world_size); + + if (fenix.options.verbose == 2) { + int index; + for (index = 0; index < fenix.fail_world_size; index++) { + verbose_print("fail_world[%d]: %d\n", index, fenix.fail_world[index]); } + } - free(survivor_world); + free(survivor_world); - active_ranks = world_size - fenix.spare_ranks; + active_ranks = world_size - fenix.spare_ranks; - if (fenix.options.verbose == 2) { - verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - current_rank, fenix.role, - active_ranks); - } + if (fenix.options.verbose == 2) { + verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", + current_rank, fenix.role, + active_ranks); + } - /* Assign new rank for reordering */ - if (current_rank >= active_ranks) { // reorder ranks - int rank_offset = ((world_size - 1) - current_rank); - - for(int fail_i = 0; fail_i < fenix.fail_world_size; fail_i++){ - if(fenix.fail_world[fail_i] > current_rank) rank_offset--; - } + /* Assign new rank for reordering */ + if (current_rank >= active_ranks) { // reorder ranks + int rank_offset = ((world_size - 1) - current_rank); - if (rank_offset < fenix.fail_world_size) { - if (fenix.options.verbose == 11) { - verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", - current_rank, fenix.fail_world[rank_offset]); - } - current_rank = fenix.fail_world[rank_offset]; - } + for(int fail_i = 0; fail_i < fenix.fail_world_size; fail_i++){ + if(fenix.fail_world[fail_i] > current_rank) rank_offset--; } - /************************************/ - /* Update the number of spare ranks */ - /************************************/ - fenix.spare_ranks = 0; - - //debug_print("not enough spare ranks to repair rank failures. repair_ranks\n"); + if (rank_offset < fenix.fail_world_size) { + if (fenix.options.verbose == 11) { + verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", + current_rank, fenix.fail_world[rank_offset]); + } + current_rank = fenix.fail_world[rank_offset]; + } } - /****************************************************************/ - /* No rank reordering is required if no spare rank is available */ - /****************************************************************/ - + /************************************/ + /* Update the number of spare ranks */ + /************************************/ + fenix.spare_ranks = 0; } } else { From 50f0a3fdeb6abaefc041b74a36ee1758ca2f7016 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 15 Jul 2025 13:49:13 -0600 Subject: [PATCH 16/21] Bugfix --- src/fenix_data_policy_in_memory_raid.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index 1f192a7..aa62c67 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -400,7 +400,9 @@ int BuddyMember::restore_impl(){ //Fetch data int p_count = e->partner_region.count(e->elm_max_count-1); recv_buf.recv(p_count*e->elm_size, left, 0, group.set_comm); - e->region.deserialize_data(e->elm_size, recv_buf, e->partner_buf); + e->partner_region.deserialize_data( + e->elm_size, recv_buf, e->partner_buf + ); //Only update timestamp after all other data updated, to indicate //recovery of this snapshot completed From d58f1d55961cb7c0643dfe927df574fc03c4502d Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 16 Oct 2025 09:56:41 -0500 Subject: [PATCH 17/21] Fix issues with FENIX_ constants --- examples/05_subset_create/subset_create.c | 2 +- examples/07_resizeable_member/resizeable.cpp | 4 ++-- include/fenix.h | 5 ++--- src/fenix_data_policy_in_memory_raid.cpp | 2 +- test/storev/storev.cpp | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/05_subset_create/subset_create.c b/examples/05_subset_create/subset_create.c index 23c15ca..7315d71 100644 --- a/examples/05_subset_create/subset_create.c +++ b/examples/05_subset_create/subset_create.c @@ -139,7 +139,7 @@ int main(int argc, char **argv) { subset[index] = -2; } - int restore_ret = Fenix_Data_member_restore(my_group, my_member, subset, kCount, FENIX_TIME_STAMP_MAX, NULL); + int restore_ret = Fenix_Data_member_restore(my_group, my_member, subset, kCount, FENIX_DATA_SNAPSHOT_LATEST, NULL); if(restore_ret != FENIX_SUCCESS){ fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, restore_ret); diff --git a/examples/07_resizeable_member/resizeable.cpp b/examples/07_resizeable_member/resizeable.cpp index 98b3697..a18faaa 100644 --- a/examples/07_resizeable_member/resizeable.cpp +++ b/examples/07_resizeable_member/resizeable.cpp @@ -158,7 +158,7 @@ int main(int argc, char **argv) { //Do a null restore to get information about the stored subset DataSubset stored_subset; int ret = member_restore( - my_group, my_member, nullptr, 0, FENIX_TIME_STAMP_MAX, stored_subset + my_group, my_member, nullptr, 0, FENIX_DATA_SNAPSHOT_LATEST, stored_subset ); if(ret != FENIX_SUCCESS) { fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, ret); @@ -173,7 +173,7 @@ int main(int argc, char **argv) { //Now do an lrestore to get the recovered data. ret = member_lrestore( - my_group, my_member, data.data(), data.size(), FENIX_TIME_STAMP_MAX, + my_group, my_member, data.data(), data.size(), FENIX_DATA_SNAPSHOT_LATEST, stored_subset ); diff --git a/include/fenix.h b/include/fenix.h index 1a0c626..f1addd6 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -357,14 +357,13 @@ int Fenix_Finalize(); */ #define FENIX_DATA_GROUP_WORLD_ID 10 #define FENIX_GROUP_ID_MAX 11 -#define FENIX_TIME_STAMP_MAX 12 -#define FENIX_DATA_MEMBER_ALL 15 +#define FENIX_DATA_MEMBER_ALL -1 #define FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER 11 #define FENIX_DATA_MEMBER_ATTRIBUTE_COUNT 12 #define FENIX_DATA_MEMBER_ATTRIBUTE_DATATYPE 13 #define FENIX_DATA_MEMBER_ATTRIBUTE_SIZE 14 #define FENIX_DATA_SNAPSHOT_LATEST -1 -#define FENIX_DATA_SNAPSHOT_ALL 16 +#define FENIX_DATA_SNAPSHOT_ALL -2 #define FENIX_RESIZEABLE 0 #define FENIX_DATA_SUBSET_CREATED 2 diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index aa62c67..c30b1a7 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -517,7 +517,7 @@ int Member::lrestore( entries.back().reset(); int end = 0; - if(timestamp == FENIX_TIME_STAMP_MAX){ + if(timestamp == FENIX_DATA_SNAPSHOT_LATEST){ if(entries[entries.size()-2].timestamp >= 0){ end = entries.size()-1; } diff --git a/test/storev/storev.cpp b/test/storev/storev.cpp index 991741a..d5af86e 100644 --- a/test/storev/storev.cpp +++ b/test/storev/storev.cpp @@ -156,7 +156,7 @@ int main(int argc, char **argv) { //Do a null restore to get information about the stored subset DataSubset stored_subset; int ret = member_restore( - my_group, my_member, nullptr, 0, FENIX_TIME_STAMP_MAX, stored_subset + my_group, my_member, nullptr, 0, FENIX_DATA_SNAPSHOT_LATEST, stored_subset ); if(ret != FENIX_SUCCESS) { fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, ret); @@ -171,7 +171,7 @@ int main(int argc, char **argv) { //Now do an lrestore to get the recovered data. ret = member_lrestore( - my_group, my_member, data.data(), data.size(), FENIX_TIME_STAMP_MAX, + my_group, my_member, data.data(), data.size(), FENIX_DATA_SNAPSHOT_LATEST, stored_subset ); From 1991c9524cfa4c6d2690862eb894a1cf1c30d2ad Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 16 Oct 2025 10:04:41 -0500 Subject: [PATCH 18/21] Small bugfixes --- include/fenix_ext.hpp | 8 ++++---- src/fenix_data_recovery.cpp | 1 + src/fenix_data_subset.cpp | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index 6066e01..9dd4d24 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -72,7 +72,7 @@ typedef struct { int num_survivor_ranks = 0; // Keeps the global information on the number of survived MPI ranks after failure int num_recovered_ranks = 0; // Keeps the number of spare ranks brought into MPI communicator recovery int spare_ranks; // Spare ranks entered by user to repair failed ranks - + ResumeMode resume_mode = JUMP; CallbackExceptionMode callback_exception_mode = RETHROW; UnhandledMode unhandled_mode = ABORT; @@ -101,15 +101,15 @@ typedef struct { MPI_Comm *user_world; // User-facing comm with repaired ranks and no spares MPI_Comm new_world; // Internal duplicate of user_world int new_world_exists = false, user_world_exists = false; - + //Values used for Fenix_Process_detect_failures int dummy_recv_buffer; MPI_Request check_failures_req; - + MPI_Op agree_op; // Global agreement call for Fenix data recovery API MPI_Errhandler mpi_errhandler; // Our custom error handler - Fenix::Data::fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure + Fenix::Data::fenix_data_recovery_t *data_recovery = nullptr; } fenix_t; } diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index 1ef833b..7261878 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -135,6 +135,7 @@ int group_create( int groupid, MPI_Comm comm, int timestart, int depth, int poli group->timestamp = -1; //indicates no commits yet group->depth = depth; group->comm = comm; + group->policy_name = policy_name; MPI_Comm_rank(comm, &(group->current_rank)); diff --git a/src/fenix_data_subset.cpp b/src/fenix_data_subset.cpp index b88e223..317b569 100644 --- a/src/fenix_data_subset.cpp +++ b/src/fenix_data_subset.cpp @@ -698,7 +698,7 @@ void DataSubset::deserialize_data( fenix_assert(dst.size()%elm_size==0); size_t max_elm = dst.size()/elm_size - 1; - if(max_elm == 0){ + if(max_elm == MAX){ max_elm = end(); fenix_assert(max_elm != MAX); dst.resize((max_elm+1)*elm_size); From 11886e59aedc72b0a66245586f74f24abe4eeba9 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 16 Oct 2025 10:09:17 -0500 Subject: [PATCH 19/21] Add pre-recovery callbacks --- include/fenix.hpp | 11 ++++++++--- include/fenix_ext.hpp | 4 +++- include/fenix_process_recovery.hpp | 8 +++++--- src/fenix.cpp | 12 ++++++------ src/fenix_callbacks.cpp | 18 +++++++++++------- src/fenix_process_recovery.cpp | 2 ++ 6 files changed, 35 insertions(+), 20 deletions(-) diff --git a/include/fenix.hpp b/include/fenix.hpp index 85acfca..46b826c 100644 --- a/include/fenix.hpp +++ b/include/fenix.hpp @@ -97,6 +97,11 @@ constexpr UnhandledMode SILENT = FENIX_UNHANDLED_SILENT; constexpr UnhandledMode PRINT = FENIX_UNHANDLED_PRINT; constexpr UnhandledMode ABORT = FENIX_UNHANDLED_ABORT; +enum CallbackLocation { + PRE_RECOVERY, + POST_RECOVERY +}; + namespace Args { struct FenixInitArgs { int* role = nullptr; @@ -128,13 +133,13 @@ int error(); int nspare(); //!@brief Overload of #Fenix_Callback_register -int callback_register(std::function callback); +int callback_register(std::function callback, CallbackLocation loc = POST_RECOVERY); //@!brief Overload of #Fenix_Callback_pop -int callback_pop(); +int callback_pop(CallbackLocation loc = POST_RECOVERY); //@!brief Overload of #Fenix_Callback_invoke_all -void callback_invoke_all(); +void callback_invoke_all(CallbackLocation loc = POST_RECOVERY); /** * @brief Get the failed ranks from the most recent recovery diff --git a/include/fenix_ext.hpp b/include/fenix_ext.hpp index 9dd4d24..4cd4514 100644 --- a/include/fenix_ext.hpp +++ b/include/fenix_ext.hpp @@ -94,7 +94,9 @@ typedef struct { int *ret_role = nullptr; int *ret_error = nullptr; - std::vector callbacks; + std::unordered_map< + CallbackLocation, std::vector + > callbacks; fenix_debug_opt_t options; // This is reserved to store the user options MPI_Comm *world; // Duplicate of comm provided by user diff --git a/include/fenix_process_recovery.hpp b/include/fenix_process_recovery.hpp index c2f26b3..e116f78 100644 --- a/include/fenix_process_recovery.hpp +++ b/include/fenix_process_recovery.hpp @@ -94,11 +94,13 @@ int __fenix_create_new_world(); int __fenix_repair_ranks(); -int __fenix_callback_register(fenix_callback_func& recover); +int __fenix_callback_register( + fenix_callback_func& recover, Fenix::CallbackLocation loc = Fenix::POST_RECOVERY +); -int __fenix_callback_pop(); +int __fenix_callback_pop(Fenix::CallbackLocation loc = Fenix::POST_RECOVERY); -void __fenix_callback_invoke_all(); +void __fenix_callback_invoke_all(Fenix::CallbackLocation loc = Fenix::POST_RECOVERY); int* __fenix_get_fail_ranks(int *, int, int); diff --git a/src/fenix.cpp b/src/fenix.cpp index 1bbc058..c6bda80 100644 --- a/src/fenix.cpp +++ b/src/fenix.cpp @@ -294,16 +294,16 @@ int nspare(){ return fenix.spare_ranks; } -int callback_register(std::function callback){ - return __fenix_callback_register(callback); +int callback_register(std::function callback, CallbackLocation loc){ + return __fenix_callback_register(callback, loc); } -int callback_pop() { - return __fenix_callback_pop(); +int callback_pop(CallbackLocation loc) { + return __fenix_callback_pop(loc); } -void callback_invoke_all() { - __fenix_callback_invoke_all(); +void callback_invoke_all(CallbackLocation loc) { + __fenix_callback_invoke_all(loc); } std::vector fail_list(){ diff --git a/src/fenix_callbacks.cpp b/src/fenix_callbacks.cpp index 982db02..70cb63b 100644 --- a/src/fenix_callbacks.cpp +++ b/src/fenix_callbacks.cpp @@ -67,25 +67,29 @@ using namespace Fenix; -int __fenix_callback_register(fenix_callback_func& recover) +static std::vector& callbacks(CallbackLocation loc){ + return fenix.callbacks.try_emplace(loc).first->second; +} + +int __fenix_callback_register(fenix_callback_func& recover, CallbackLocation loc) { if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; - fenix.callbacks.push_back(recover); + callbacks(loc).push_back(recover); return FENIX_SUCCESS; } -int __fenix_callback_pop(){ +int __fenix_callback_pop(CallbackLocation loc){ if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; - if(fenix.callbacks.empty()) return FENIX_ERROR_CALLBACK_NOT_REGISTERED; + if(callbacks(loc).empty()) return FENIX_ERROR_CALLBACK_NOT_REGISTERED; - fenix.callbacks.pop_back(); + callbacks(loc).pop_back(); return FENIX_SUCCESS; } -void __fenix_callback_invoke_all(){ +void __fenix_callback_invoke_all(CallbackLocation loc){ //If callbacks are invoked in a nested manner due to caught exceptions //within a callback, we want to only finish the most recent call. All prior //calls should exit as soon as control returns. @@ -93,7 +97,7 @@ void __fenix_callback_invoke_all(){ int m_callbacks_layer = callbacks_depth++; try { - for(auto& cb : fenix.callbacks) { + for(auto& cb : callbacks(loc)) { if(callbacks_depth != m_callbacks_layer+1) break; cb(*fenix.user_world, fenix.mpi_fail_code); } diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index b563b49..c261ecb 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -799,6 +799,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) switch (fenix.mpi_fail_code) { case MPI_ERR_PROC_FAILED_PENDING: case MPI_ERR_PROC_FAILED: + __fenix_callback_invoke_all(Fenix::PRE_RECOVERY); MPIX_Comm_revoke(*fenix.world); MPIX_Comm_revoke(fenix.new_world); if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); @@ -806,6 +807,7 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) fenix.repair_result = __fenix_repair_ranks(); break; case MPI_ERR_REVOKED: + __fenix_callback_invoke_all(Fenix::PRE_RECOVERY); fenix.repair_result = __fenix_repair_ranks(); break; default: From f22c88ded8170fd7267acfee23f0af34e0368366 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Thu, 16 Oct 2025 10:09:54 -0500 Subject: [PATCH 20/21] Revoke internal comms for IMR policy --- include/fenix_data_policy_in_memory_raid.hpp | 1 + src/fenix_data_policy_in_memory_raid.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/fenix_data_policy_in_memory_raid.hpp b/include/fenix_data_policy_in_memory_raid.hpp index 02759ed..6aa92b6 100644 --- a/include/fenix_data_policy_in_memory_raid.hpp +++ b/include/fenix_data_policy_in_memory_raid.hpp @@ -173,6 +173,7 @@ struct Group : public fenix_group_t { MPI_Comm set_comm = MPI_COMM_NULL; int set_size, set_rank; + static inline bool set_comm_revoke_callback = false; std::map> member_data; std::deque timestamps; diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index c30b1a7..c4a23b5 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -64,6 +64,7 @@ #include #include +#include #include "fenix.h" #include "fenix_ext.hpp" #include "fenix_opt.hpp" @@ -669,6 +670,25 @@ void Group::build_set_comm(){ MPI_Comm_size(set_comm, &set_size); MPI_Comm_rank(set_comm, &set_rank); + + if(!set_comm_revoke_callback){ + //TODO: This isn't great, and doesn't work w/ fenix restarts + // (ie finalize then init), we need a better way to refer to callbacks + // than just push/pop. Maybe a push/pop stack and an add/del map? + set_comm_revoke_callback = true; + Fenix::callback_register([](MPI_Comm, int){ + auto groups = fenix.data_recovery; + if(NULL == groups) return; + for(int i = 0; i < groups->count; i++){ + auto g = groups->group[i]; + if(g->policy_name != FENIX_DATA_POLICY_IMR) continue; + auto imr_g = static_cast(g); + + if(imr_g->set_comm == MPI_COMM_NULL) continue; + MPIX_Comm_revoke(imr_g->set_comm); + } + }, PRE_RECOVERY); + } } Member* Group::find_member(int memberid){ From 5e2f1bc12ae6ee8a796706c0b3980d5f9d4c5dd0 Mon Sep 17 00:00:00 2001 From: Matthew Whitlock Date: Tue, 21 Oct 2025 12:53:14 -0500 Subject: [PATCH 21/21] Support building on mpich --- include/fenix_process_recovery.hpp | 10 +++++++ src/fenix_data_policy_in_memory_raid.cpp | 3 +++ src/fenix_data_recovery.cpp | 2 ++ src/fenix_process_recovery.cpp | 26 ++++++++++++++----- test/exception_throw/CMakeLists.txt | 1 + test/failed_spares/CMakeLists.txt | 4 ++- .../fenix_request_tracking_test.c | 5 ++-- test/storev/CMakeLists.txt | 1 + test/subset/CMakeLists.txt | 4 +++ 9 files changed, 46 insertions(+), 10 deletions(-) diff --git a/include/fenix_process_recovery.hpp b/include/fenix_process_recovery.hpp index e116f78..351b09c 100644 --- a/include/fenix_process_recovery.hpp +++ b/include/fenix_process_recovery.hpp @@ -57,6 +57,16 @@ #ifndef __FENIX_PROCESS_RECOVERY__ #define __FENIX_PROCESS_RECOVERY__ +#if defined(MPIX_ERR_PROC_FAILED) && ! defined(MPI_ERR_PROC_FAILED) +#define MPI_ERR_PROC_FAILED MPIX_ERR_PROC_FAILED +#endif +#if defined(MPIX_ERR_PROC_FAILED_PENDING) && ! defined(MPI_ERR_PROC_FAILED_PENDING) +#define MPI_ERR_PROC_FAILED_PENDING MPIX_ERR_PROC_FAILED_PENDING +#endif +#if defined(MPIX_ERR_REVOKED) && ! defined(MPI_ERR_REVOKED) +#define MPI_ERR_REVOKED MPIX_ERR_REVOKED +#endif + #include #include #include diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp index c4a23b5..d2f35f4 100644 --- a/src/fenix_data_policy_in_memory_raid.cpp +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -64,7 +64,10 @@ #include #include +#ifndef MPICH_VERSION #include +#endif + #include "fenix.h" #include "fenix_ext.hpp" #include "fenix_opt.hpp" diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp index 7261878..429719d 100644 --- a/src/fenix_data_recovery.cpp +++ b/src/fenix_data_recovery.cpp @@ -64,7 +64,9 @@ #include "fenix_ext.hpp" #include "fenix_data_subset.hpp" +#ifndef MPICH_VERSION #include +#endif namespace Fenix::Data { diff --git a/src/fenix_process_recovery.cpp b/src/fenix_process_recovery.cpp index c261ecb..92f1560 100644 --- a/src/fenix_process_recovery.cpp +++ b/src/fenix_process_recovery.cpp @@ -55,6 +55,12 @@ */ #include +#include + +#include +#ifndef MPICH_VERSION +#include +#endif #include "fenix_ext.hpp" #include "fenix_process_recovery.hpp" @@ -62,10 +68,6 @@ #include "fenix_data_recovery.hpp" #include "fenix_opt.hpp" #include "fenix_util.hpp" -#include -#include - -#include using namespace Fenix; using namespace Fenix::Data; @@ -180,7 +182,11 @@ int fenix_preinit(const Args::FenixInitArgs& args, jmp_buf* jump_env){ __fenix_get_current_rank(*fenix.world), fenix.role); } } else { +#ifdef MPICH_VERSION + MPIX_Comm_failure_ack(*fenix.world); +#else MPIX_Comm_ack_failed(*fenix.world, __fenix_get_world_size(*fenix.world), &a); +#endif } fenix.role = FENIX_ROLE_RECOVERED_RANK; } @@ -651,7 +657,7 @@ void __fenix_postinit() if(fenix.new_world_exists){ //Set up dummy irecv to use for checking for failures. MPI_Irecv(&fenix.dummy_recv_buffer, 1, MPI_INT, MPI_ANY_SOURCE, - 34095347, fenix.new_world, &fenix.check_failures_req); + 1234, fenix.new_world, &fenix.check_failures_req); } if(fenix.role != FENIX_ROLE_INITIAL_RANK) { @@ -704,7 +710,9 @@ void __fenix_finalize() for (int i = first_spare_rank; i <= last_spare_rank; i++) { //We don't care if a spare failed, ignore return value int unused; - MPI_Send(&unused, 1, MPI_INT, i, 1, *fenix.world); + MPI_Request req; + MPI_Isend(&unused, 1, MPI_INT, i, 1, *fenix.world, &req); + MPI_Request_free(&req); } } @@ -749,8 +757,11 @@ void __fenix_finalize() void __fenix_finalize_spare() { fenix.fenix_init_flag = false; - int unused; + +#ifdef MPICH_VERSION + MPIX_Comm_agree(*fenix.world, &unused); +#else MPI_Request agree_req, recv_req = MPI_REQUEST_NULL; MPIX_Comm_iagree(*fenix.world, &unused, &agree_req); @@ -770,6 +781,7 @@ void __fenix_finalize_spare() } if(recv_req != MPI_REQUEST_NULL) MPI_Cancel(&recv_req); +#endif MPI_Op_free(&fenix.agree_op); MPI_Comm_set_errhandler(*fenix.world, MPI_ERRORS_ARE_FATAL); diff --git a/test/exception_throw/CMakeLists.txt b/test/exception_throw/CMakeLists.txt index 7cd5a58..2f2de3a 100644 --- a/test/exception_throw/CMakeLists.txt +++ b/test/exception_throw/CMakeLists.txt @@ -9,6 +9,7 @@ # add_executable(fenix_exceptions fenix_exceptions.cpp) +target_compile_features(fenix_exceptions PRIVATE cxx_std_17) target_link_libraries(fenix_exceptions fenix MPI::MPI_CXX) add_test(NAME exception_throw diff --git a/test/failed_spares/CMakeLists.txt b/test/failed_spares/CMakeLists.txt index 46e4f4c..b1af7c5 100644 --- a/test/failed_spares/CMakeLists.txt +++ b/test/failed_spares/CMakeLists.txt @@ -8,8 +8,10 @@ # directory. # +find_package(Threads REQUIRED) + add_executable(fenix_failed_spares fenix_failed_spares.c) -target_link_libraries(fenix_failed_spares fenix MPI::MPI_CXX) +target_link_libraries(fenix_failed_spares fenix MPI::MPI_CXX Threads::Threads) add_test(NAME failed_spares COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_failed_spares ${MPIEXEC_POSTFLAGS} 3 1 3 4 ) diff --git a/test/request_tracking/fenix_request_tracking_test.c b/test/request_tracking/fenix_request_tracking_test.c index ae0dc11..d925a3f 100644 --- a/test/request_tracking/fenix_request_tracking_test.c +++ b/test/request_tracking/fenix_request_tracking_test.c @@ -39,6 +39,7 @@ int main(int argc, char **argv) int *bufs_recv = (int *)malloc(isends*sizeof(int)); MPI_Request *reqs = (MPI_Request *)malloc(isends*sizeof(MPI_Request)); MPI_Request *reqs_recv = (MPI_Request *)malloc(isends*sizeof(MPI_Request)); + MPI_Status *statuses = (MPI_Status *)malloc(isends*sizeof(MPI_Status)); MPI_Barrier(newcomm); MPI_Barrier(newcomm); MPI_Barrier(newcomm); @@ -59,8 +60,8 @@ int main(int argc, char **argv) } for(i=0 ; i