diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index a522121c447..28422e4b553 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -428,7 +428,6 @@ int ompi_mpi_finalize(void) } /* Leave the RTE */ - if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { return ret; } @@ -440,6 +439,9 @@ int ompi_mpi_finalize(void) return ret; } + /* cleanup the progress engine */ + opal_progress_finalize(); + if (OPAL_SUCCESS != (ret = opal_finalize_util())) { return ret; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index df98922637f..4af70709034 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -17,7 +17,7 @@ * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -483,6 +483,16 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so */ OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); + /* + * Initialize the general progress engine + */ + if (OPAL_SUCCESS != (ret = opal_progress_init())) { + error = "opal_progress_init"; + goto error; + } + /* we want to tick the event library whenever possible */ + opal_progress_event_users_increment(); + #if OPAL_HAVE_HWLOC /* if hwloc is available but didn't get setup for some * reason, do so now diff --git a/opal/Makefile.am b/opal/Makefile.am index a7c9d0bc5d8..127ac4fa598 100644 --- a/opal/Makefile.am +++ b/opal/Makefile.am @@ -75,6 +75,7 @@ nobase_opal_HEADERS = $(headers) endif include class/Makefile.am +include errhandler/Makefile.am include memoryhooks/Makefile.am include runtime/Makefile.am include threads/Makefile.am diff --git a/opal/class/opal_hotel.c b/opal/class/opal_hotel.c index 209e66eb9e2..8cd2aa232eb 100644 --- a/opal/class/opal_hotel.c +++ b/opal/class/opal_hotel.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +34,7 @@ static void local_eviction_callback(int fd, short flags, void *arg) int opal_hotel_init(opal_hotel_t *h, int num_rooms, + opal_event_base_t *evbase, uint32_t eviction_timeout, int eviction_event_priority, opal_hotel_eviction_callback_fn_t evict_callback_fn) @@ -46,6 +48,7 @@ int opal_hotel_init(opal_hotel_t *h, int num_rooms, } h->num_rooms = num_rooms; + h->evbase = evbase; h->eviction_timeout.tv_usec = eviction_timeout % 1000000; h->eviction_timeout.tv_sec = eviction_timeout / 1000000; h->evict_callback_fn = evict_callback_fn; @@ -69,7 +72,7 @@ int opal_hotel_init(opal_hotel_t *h, int num_rooms, h->eviction_args[i].room_num = i; /* Create this room's event (but don't add it) */ - opal_event_set(opal_event_base, + opal_event_set(h->evbase, &(h->rooms[i].eviction_timer_event), -1, 0, local_eviction_callback, &(h->eviction_args[i])); diff --git a/opal/class/opal_hotel.h b/opal/class/opal_hotel.h index f8ecd4c0cb5..7850cf72971 100644 --- a/opal/class/opal_hotel.h +++ b/opal/class/opal_hotel.h @@ -111,6 +111,8 @@ typedef struct opal_hotel_t { /* Max number of rooms in the hotel */ int num_rooms; + /* event base for this hotel */ + opal_event_base_t *evbase; struct timeval eviction_timeout; opal_hotel_eviction_callback_fn_t evict_callback_fn; @@ -133,6 +135,7 @@ OBJ_CLASS_DECLARATION(opal_hotel_t); * * @param hotel Pointer to a hotel (IN) * @param num_rooms The total number of rooms in the hotel (IN) + * @param evbase Pointer to the event base for timer events * @param eviction_timeout Max length of a stay at the hotel before * the eviction callback is invoked (in microseconds) * @param eviction_event_priority Event lib priority for the eviction timeout @@ -147,6 +150,7 @@ OBJ_CLASS_DECLARATION(opal_hotel_t); * the error indicate what went wrong in the function. */ OPAL_DECLSPEC int opal_hotel_init(opal_hotel_t *hotel, int num_rooms, + opal_event_base_t *evbase, uint32_t eviction_timeout, int eviction_event_priority, opal_hotel_eviction_callback_fn_t evict_callback_fn); @@ -199,8 +203,8 @@ static inline int opal_hotel_checkin(opal_hotel_t *hotel, * caller *knows* that there is a room available. */ static inline void opal_hotel_checkin_with_res(opal_hotel_t *hotel, - void *occupant, - int *room_num) + void *occupant, + int *room_num) { opal_hotel_room_t *room; diff --git a/opal/errhandler/Makefile.am b/opal/errhandler/Makefile.am new file mode 100644 index 00000000000..b6e3eab4d5a --- /dev/null +++ b/opal/errhandler/Makefile.am @@ -0,0 +1,17 @@ +# -*- makefile -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from opal/Makefile.am + +headers += \ + errhandler/opal_errhandler.h + +lib@OPAL_LIB_PREFIX@open_pal_la_SOURCES += \ + errhandler/opal_errhandler.c diff --git a/opal/errhandler/opal_errhandler.c b/opal/errhandler/opal_errhandler.c new file mode 100644 index 00000000000..fdd00d67674 --- /dev/null +++ b/opal/errhandler/opal_errhandler.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/errhandler/opal_errhandler.h" + +opal_errhandler_fn_t errhandler = NULL; +void *cbdata = NULL; + +void opal_register_errhandler(opal_errhandler_fn_t newerr, void *cbd) +{ + errhandler = newerr; + cbdata = cbd; +} + +void opal_deregister_errhandler(void) +{ + errhandler = NULL; + cbdata = NULL; +} + +void opal_invoke_errhandler(int status, opal_proc_t *proc) +{ + if (NULL != errhandler) { + errhandler(status, proc, cbdata); + } +} diff --git a/opal/errhandler/opal_errhandler.h b/opal/errhandler/opal_errhandler.h new file mode 100644 index 00000000000..4a1646f52b8 --- /dev/null +++ b/opal/errhandler/opal_errhandler.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_ERRHANDLER_H +#define OPAL_ERRHANDLER_H + +#include "opal_config.h" + +#include "opal/util/proc.h" + +typedef void (*opal_errhandler_fn_t)(int status, opal_proc_t *proc, void *cbdata); + +OPAL_DECLSPEC void opal_register_errhandler(opal_errhandler_fn_t errhandler, void *cbdata); + +OPAL_DECLSPEC void opal_deregister_errhandler(void); + +OPAL_DECLSPEC void opal_invoke_errhandler(int status, opal_proc_t *proc); + +#endif diff --git a/opal/mca/btl/openib/btl_openib_fd.c b/opal/mca/btl/openib/btl_openib_fd.c index c2a82d9e367..c7833c3c7f9 100644 --- a/opal/mca/btl/openib/btl_openib_fd.c +++ b/opal/mca/btl/openib/btl_openib_fd.c @@ -3,6 +3,7 @@ * Copyright (c) 2009 Sandia National Laboratories. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -175,7 +176,7 @@ static int service_pipe_cmd_add_fd(bool use_libevent, cmd_t *cmd) if (use_libevent) { /* Make an event for this fd */ ri->ri_event_used = true; - opal_event_set(opal_event_base, &ri->ri_event, ri->ri_fd, + opal_event_set(opal_sync_event_base, &ri->ri_event, ri->ri_fd, ri->ri_flags | OPAL_EV_PERSIST, service_fd_callback, ri); opal_event_add(&ri->ri_event, 0); @@ -501,7 +502,7 @@ int opal_btl_openib_fd_init(void) /* Create a libevent event that is used in the main thread to watch its pipe */ - opal_event_set(opal_event_base, &main_thread_event, pipe_to_main_thread[0], + opal_event_set(opal_sync_event_base, &main_thread_event, pipe_to_main_thread[0], OPAL_EV_READ | OPAL_EV_PERSIST, main_thread_event_callback, NULL); opal_event_add(&main_thread_event, 0); diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index d3a68bed2ef..e9dfb7584f1 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -7,7 +7,7 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * * $COPYRIGHT$ @@ -2212,7 +2212,7 @@ static void udcm_sent_message_constructor (udcm_message_sent_t *message) { memset ((char *)message + sizeof (message->super), 0, sizeof (*message) - sizeof (message->super)); - opal_event_evtimer_set(opal_event_base, &message->event, udcm_send_timeout, message); + opal_event_evtimer_set(opal_sync_event_base, &message->event, udcm_send_timeout, message); } static void udcm_sent_message_destructor (udcm_message_sent_t *message) diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index beb2f95a2f5..855fa1206da 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -16,7 +16,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -818,7 +818,7 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family) /* register listen port */ #if OPAL_ENABLE_IPV6 if (AF_INET6 == af_family) { - opal_event_set(opal_event_base, &mca_btl_tcp_component.tcp6_recv_event, + opal_event_set(opal_sync_event_base, &mca_btl_tcp_component.tcp6_recv_event, mca_btl_tcp_component.tcp6_listen_sd, OPAL_EV_READ|OPAL_EV_PERSIST, mca_btl_tcp_component_accept_handler, @@ -827,7 +827,7 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family) } else #endif { - opal_event_set(opal_event_base, &mca_btl_tcp_component.tcp_recv_event, + opal_event_set(opal_sync_event_base, &mca_btl_tcp_component.tcp_recv_event, mca_btl_tcp_component.tcp_listen_sd, OPAL_EV_READ|OPAL_EV_PERSIST, mca_btl_tcp_component_accept_handler, @@ -1049,7 +1049,7 @@ static void mca_btl_tcp_component_accept_handler( int incoming_sd, /* wait for receipt of peers process identifier to complete this connection */ event = OBJ_NEW(mca_btl_tcp_event_t); - opal_event_set(opal_event_base, &event->event, sd, OPAL_EV_READ, mca_btl_tcp_component_recv_handler, event); + opal_event_set(opal_sync_event_base, &event->event, sd, OPAL_EV_READ, mca_btl_tcp_component_recv_handler, event); opal_event_add(&event->event, 0); } } diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index a6a284c3c1b..993764d95d3 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -309,7 +309,7 @@ static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_ btl_endpoint->endpoint_cache_pos = btl_endpoint->endpoint_cache; #endif /* MCA_BTL_TCP_ENDPOINT_CACHE */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_recv_event, + opal_event_set(opal_sync_event_base, &btl_endpoint->endpoint_recv_event, btl_endpoint->endpoint_sd, OPAL_EV_READ|OPAL_EV_PERSIST, mca_btl_tcp_endpoint_recv_handler, @@ -320,7 +320,7 @@ static inline void mca_btl_tcp_endpoint_event_init(mca_btl_base_endpoint_t* btl_ * will be fired only once, and when the endpoint is marked as * CONNECTED the event should be recreated with the correct flags. */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_send_event, + opal_event_set(opal_sync_event_base, &btl_endpoint->endpoint_send_event, btl_endpoint->endpoint_sd, OPAL_EV_WRITE, mca_btl_tcp_endpoint_send_handler, @@ -509,7 +509,7 @@ void mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint, assert(btl_endpoint->endpoint_sd_next == -1); btl_endpoint->endpoint_sd_next = sd; - opal_event_evtimer_set(opal_event_base, &btl_endpoint->endpoint_accept_event, + opal_event_evtimer_set(opal_sync_event_base, &btl_endpoint->endpoint_accept_event, mca_btl_tcp_endpoint_complete_accept, btl_endpoint); opal_event_add(&btl_endpoint->endpoint_accept_event, &now); } @@ -570,7 +570,7 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, true, "READY [endpoint_connected]"); /* Create the send event in a persistent manner. */ - opal_event_set(opal_event_base, &btl_endpoint->endpoint_send_event, + opal_event_set(opal_sync_event_base, &btl_endpoint->endpoint_send_event, btl_endpoint->endpoint_sd, OPAL_EV_WRITE | OPAL_EV_PERSIST, mca_btl_tcp_endpoint_send_handler, diff --git a/opal/mca/btl/usnic/btl_usnic_component.c b/opal/mca/btl/usnic/btl_usnic_component.c index c2bd218e2b1..cdd9d8bc013 100644 --- a/opal/mca/btl/usnic/btl_usnic_component.c +++ b/opal/mca/btl/usnic/btl_usnic_component.c @@ -15,7 +15,7 @@ * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -972,7 +972,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules, } /* start timer to guarantee synthetic clock advances */ - opal_event_set(opal_event_base, &usnic_clock_timer_event, + opal_event_set(opal_sync_event_base, &usnic_clock_timer_event, -1, 0, usnic_clock_callback, &usnic_clock_timeout); usnic_clock_timer_event_set = true; diff --git a/opal/mca/btl/usnic/btl_usnic_endpoint.c b/opal/mca/btl/usnic/btl_usnic_endpoint.c index 998e4576c3f..79baea949c5 100644 --- a/opal/mca/btl/usnic/btl_usnic_endpoint.c +++ b/opal/mca/btl/usnic/btl_usnic_endpoint.c @@ -86,6 +86,7 @@ static void endpoint_construct(mca_btl_base_endpoint_t* endpoint) OBJ_CONSTRUCT(&endpoint->endpoint_hotel, opal_hotel_t); opal_hotel_init(&endpoint->endpoint_hotel, WINDOW_SIZE, + opal_sync_event_base, mca_btl_usnic_component.retrans_timeout, 0, opal_btl_usnic_ack_timeout); diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 2b36af126bd..907c3f6bd8b 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -15,7 +15,7 @@ * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -2068,7 +2068,7 @@ static void init_async_event(opal_btl_usnic_module_t *module) } /* Get the fd to receive events on this device */ - opal_event_set(opal_event_base, &(module->device_async_event), fd, + opal_event_set(opal_sync_event_base, &(module->device_async_event), fd, OPAL_EV_READ | OPAL_EV_PERSIST, module_async_event_callback, module); opal_event_add(&(module->device_async_event), NULL); diff --git a/opal/mca/btl/usnic/btl_usnic_stats.c b/opal/mca/btl/usnic/btl_usnic_stats.c index 6e9051284ef..35c495810af 100644 --- a/opal/mca/btl/usnic/btl_usnic_stats.c +++ b/opal/mca/btl/usnic/btl_usnic_stats.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -212,7 +213,7 @@ int opal_btl_usnic_stats_init(opal_btl_usnic_module_t *module) module->stats.timeout.tv_sec = mca_btl_usnic_component.stats_frequency; module->stats.timeout.tv_usec = 0; - opal_event_set(opal_event_base, &(module->stats.timer_event), + opal_event_set(opal_sync_event_base, &(module->stats.timer_event), -1, EV_TIMEOUT | EV_PERSIST, &usnic_stats_callback, module); opal_event_add(&(module->stats.timer_event), diff --git a/opal/mca/event/base/event_base_frame.c b/opal/mca/event/base/event_base_frame.c index c1713b1d095..6f076898ecf 100644 --- a/opal/mca/event/base/event_base_frame.c +++ b/opal/mca/event/base/event_base_frame.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +15,7 @@ #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" +#include "opal/runtime/opal_progress_threads.h" #include "opal/mca/event/event.h" #include "opal/mca/event/base/base.h" @@ -27,6 +28,13 @@ */ #include "opal/mca/event/base/static-components.h" +/* + * Globals + */ +opal_event_base_t *opal_async_event_base=NULL; +opal_event_base_t *opal_sync_event_base=NULL; + + /** * Initialize the event MCA framework * @@ -56,16 +64,14 @@ MCA_BASE_FRAMEWORK_DECLARE(opal, event, NULL, NULL, opal_event_base_open, static int opal_event_base_close(void) { + /* release the synchronous event base */ + // opal_event_base_free(opal_sync_event_base); + /* cleanup components even though they are statically opened */ return mca_base_framework_components_close (&opal_event_base_framework, NULL); } -/* - * Globals - */ -opal_event_base_t *opal_event_base=NULL; - static int opal_event_base_open(mca_base_open_flag_t flags) { int rc = OPAL_SUCCESS; @@ -83,15 +89,12 @@ static int opal_event_base_open(mca_base_open_flag_t flags) /* Declare our intent to use threads */ opal_event_use_threads(); - /* get our event base */ - if (NULL == (opal_event_base = opal_event_base_create())) { + /* setup the synchronized event base - this is + * the event base that will be progressed + * via the main process thread */ + if (NULL == (opal_sync_event_base = opal_event_base_create())) { return OPAL_ERROR; } - - /* set the number of priorities */ - if (0 < OPAL_EVENT_NUM_PRI) { - opal_event_base_priority_init(opal_event_base, OPAL_EVENT_NUM_PRI); - } - - return rc; + + return OPAL_SUCCESS; } diff --git a/opal/mca/event/event.h b/opal/mca/event/event.h index cc81bb95068..600fa21348f 100644 --- a/opal/mca/event/event.h +++ b/opal/mca/event/event.h @@ -36,13 +36,15 @@ BEGIN_C_DECLS #define OPAL_EVENT_NUM_PRI 8 #define OPAL_EV_ERROR_PRI 0 -#define OPAL_EV_MSG_HI_PRI 1 -#define OPAL_EV_SYS_HI_PRI 2 -#define OPAL_EV_MSG_LO_PRI 3 -#define OPAL_EV_SYS_LO_PRI 4 -#define OPAL_EV_INFO_HI_PRI 5 -#define OPAL_EV_INFO_LO_PRI 6 -#define OPAL_EV_LOWEST_PRI 7 + +#define OPAL_EV_BTL_RED_PRI 1 +#define OPAL_EV_BTL_YEL_PRI 2 +#define OPAL_EV_BTL_GRN_PRI 3 +#define OPAL_EV_BTL_BLU_PRI 4 + +#define OPAL_EV_RTE_HI_PRI 5 +#define OPAL_EV_RTE_MED_PRI 6 +#define OPAL_EV_RTE_LO_PRI 7 #define OPAL_EVENT_SIGNAL(ev) opal_event_get_signal(ev) diff --git a/opal/mca/event/external/external.h b/opal/mca/event/external/external.h index 7ddb54645b1..ca05fc93b26 100644 --- a/opal/mca/event/external/external.h +++ b/opal/mca/event/external/external.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -27,7 +28,8 @@ BEGIN_C_DECLS typedef struct event_base opal_event_base_t; typedef struct event opal_event_t; -OPAL_DECLSPEC extern opal_event_base_t *opal_event_base; +OPAL_DECLSPEC extern opal_event_base_t *opal_async_event_base; +OPAL_DECLSPEC extern opal_event_base_t *opal_sync_event_base; #define OPAL_EV_TIMEOUT EV_TIMEOUT #define OPAL_EV_READ EV_READ diff --git a/opal/mca/event/libevent2022/libevent2022.h b/opal/mca/event/libevent2022/libevent2022.h index 7c6706323e3..ea6a369c001 100644 --- a/opal/mca/event/libevent2022/libevent2022.h +++ b/opal/mca/event/libevent2022/libevent2022.h @@ -67,7 +67,8 @@ BEGIN_C_DECLS typedef struct event_base opal_event_base_t; typedef struct event opal_event_t; -OPAL_DECLSPEC extern opal_event_base_t *opal_event_base; +OPAL_DECLSPEC extern opal_event_base_t *opal_async_event_base; +OPAL_DECLSPEC extern opal_event_base_t *opal_sync_event_base; #define OPAL_EV_TIMEOUT EV_TIMEOUT #define OPAL_EV_READ EV_READ @@ -82,7 +83,12 @@ OPAL_DECLSPEC extern opal_event_base_t *opal_event_base; /* Global function to create and release an event base */ OPAL_DECLSPEC opal_event_base_t* opal_event_base_create(void); -#define opal_event_base_free(x) event_base_free(x) +#define opal_event_base_free(x) \ + do { \ + OPAL_OUTPUT_VERBOSE((5, opal_event_base_framework.framework_output, \ + "event_base_free[%s:%d]", __FILE__, __LINE__)); \ + event_base_free(x); \ + } while(0); OPAL_DECLSPEC int opal_event_init(void); @@ -107,9 +113,19 @@ OPAL_DECLSPEC int opal_event_init(void); #define opal_event_set(b, x, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) -#define opal_event_add(ev, tv) event_add((ev), (tv)) - -#define opal_event_del(ev) event_del((ev)) +#define opal_event_add(ev, tv) \ + do { \ + OPAL_OUTPUT_VERBOSE((5, opal_event_base_framework.framework_output, \ + "event_add[%s:%d]", __FILE__, __LINE__)); \ + event_add((ev), (tv)); \ + } while(0); + +#define opal_event_del(ev) \ + do { \ + OPAL_OUTPUT_VERBOSE((5, opal_event_base_framework.framework_output, \ + "event_del[%s:%d]", __FILE__, __LINE__)); \ + event_del((ev)); \ + } while(0); #define opal_event_active(x, y, z) event_active((x), (y), (z)) diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 3df545abd0b..3c613e8aada 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -33,7 +33,7 @@ OPAL_DECLSPEC extern bool opal_pmix_base_allow_delayed_server; OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err); OPAL_DECLSPEC void opal_pmix_base_deregister_handler(void); -OPAL_DECLSPEC void opal_pmix_base_errhandler(int error); +OPAL_DECLSPEC void opal_pmix_base_errhandler(int error, opal_proc_t *proc); END_C_DECLS diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index c5859652929..67dfced1fc0 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -41,10 +41,10 @@ void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err) errhandler = err; } -void opal_pmix_base_errhandler(int error) +void opal_pmix_base_errhandler(int error, opal_proc_t *proc) { if (NULL != errhandler) { - errhandler(error); + errhandler(error, proc); } } diff --git a/opal/mca/pmix/native/pmix_native.c b/opal/mca/pmix/native/pmix_native.c index cd5e704918c..58a0d2d6f62 100644 --- a/opal/mca/pmix/native/pmix_native.c +++ b/opal/mca/pmix/native/pmix_native.c @@ -24,6 +24,7 @@ #endif #include "opal/dss/dss.h" +#include "opal/errhandler/opal_errhandler.h" #include "opal/mca/event/event.h" #include "opal/mca/hwloc/base/base.h" #include "opal/runtime/opal.h" @@ -189,13 +190,11 @@ static int native_init(void) sizeof(mca_pmix_native_component.address.sun_path)-1, "%s", uri[1]); opal_argv_free(uri); - - /* create an event base and progress thread for us */ - if (NULL == (mca_pmix_native_component.evbase = opal_start_progress_thread("pmix_native", true))) { - return OPAL_ERROR; - } } + /* register our errhandler to point to opal_errhandler */ + opal_pmix_base_register_handler(opal_invoke_errhandler); + /* we will connect on first send */ return OPAL_SUCCESS; @@ -252,11 +251,6 @@ static int native_fini(void) OBJ_RELEASE(cb); } - if (NULL != mca_pmix_native_component.evbase) { - opal_stop_progress_thread("pmix_native", true); - mca_pmix_native_component.evbase = NULL; - } - if (0 <= mca_pmix_native_component.sd) { CLOSE_THE_SOCKET(mca_pmix_native_component.sd); } @@ -327,8 +321,7 @@ static int native_abort(int flag, const char msg[]) /* push a timeout event to wake us up just in case this * message cannot get thru - e.g., someone else may have * detected the failure of the server and ordered an abort */ - opal_event_evtimer_set(mca_pmix_native_component.evbase, - &ev, timeout, cb); + opal_event_evtimer_set(opal_async_event_base, &ev, timeout, cb); opal_event_evtimer_add(&ev, &tv); /* push the message into our event base to send to the server */ diff --git a/opal/mca/pmix/native/pmix_native.h b/opal/mca/pmix/native/pmix_native.h index 082e046e3e6..477f58d8147 100644 --- a/opal/mca/pmix/native/pmix_native.h +++ b/opal/mca/pmix/native/pmix_native.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,8 +44,10 @@ typedef enum { /* define a macro for abnormal termination */ #define PMIX_NATIVE_ABNORMAL_TERM \ do { \ + opal_proc_t *me; \ + me = opal_proc_local_get(); \ mca_pmix_native_component.state = PMIX_USOCK_FAILED; \ - opal_pmix_base_errhandler(OPAL_ERR_COMM_FAILURE); \ + opal_pmix_base_errhandler(OPAL_ERR_COMM_FAILURE, me); \ } while(0); /* define a command type for communicating to the @@ -140,7 +142,6 @@ typedef struct { opal_buffer_t *cache_local; opal_buffer_t *cache_remote; opal_buffer_t *cache_global; - opal_event_base_t *evbase; opal_process_name_t id; opal_process_name_t server; char *uri; @@ -190,9 +191,9 @@ OPAL_MODULE_DECLSPEC int usock_send_connect_ack(void); ms->bfr = (b); \ ms->cbfunc = (cb); \ ms->cbdata = (d); \ - opal_event_set(mca_pmix_native_component.evbase, &((ms)->ev), -1, \ - OPAL_EV_WRITE, pmix_usock_send_recv, (ms)); \ - opal_event_set_priority(&((ms)->ev), OPAL_EV_MSG_LO_PRI); \ + opal_event_set(opal_async_event_base, &((ms)->ev), -1, \ + OPAL_EV_WRITE, pmix_usock_send_recv, (ms)); \ + opal_event_set_priority(&((ms)->ev), OPAL_EV_RTE_HI_PRI); \ opal_event_active(&((ms)->ev), OPAL_EV_WRITE, 1); \ } while(0); @@ -202,10 +203,10 @@ OPAL_MODULE_DECLSPEC int usock_send_connect_ack(void); "%s [%s:%d] post msg", \ OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ __FILE__, __LINE__); \ - opal_event_set(mca_pmix_native_component.evbase, &ms->ev, -1, \ + opal_event_set(opal_async_event_base, &ms->ev, -1, \ OPAL_EV_WRITE, \ pmix_usock_process_msg, ms); \ - opal_event_set_priority(&ms->ev, OPAL_EV_MSG_LO_PRI); \ + opal_event_set_priority(&ms->ev, OPAL_EV_RTE_HI_PRI); \ opal_event_active(&ms->ev, OPAL_EV_WRITE, 1); \ } while(0); diff --git a/opal/mca/pmix/native/pmix_native_component.c b/opal/mca/pmix/native/pmix_native_component.c index 8b10adf897a..f3b3ce6d83a 100644 --- a/opal/mca/pmix/native/pmix_native_component.c +++ b/opal/mca/pmix/native/pmix_native_component.c @@ -82,7 +82,6 @@ static int pmix_native_open(void) mca_pmix_native_component.cache_local = NULL; mca_pmix_native_component.cache_remote = NULL; mca_pmix_native_component.cache_global = NULL; - mca_pmix_native_component.evbase = NULL; mca_pmix_native_component.id = opal_name_invalid; mca_pmix_native_component.server = opal_name_invalid; mca_pmix_native_component.uri = NULL; diff --git a/opal/mca/pmix/native/usock.c b/opal/mca/pmix/native/usock.c index 6de7eacf357..aa164abaa3f 100644 --- a/opal/mca/pmix/native/usock.c +++ b/opal/mca/pmix/native/usock.c @@ -68,14 +68,14 @@ static OBJ_CLASS_INSTANCE(pmix_usock_op_t, opal_object_t, NULL, NULL); -#define PMIX_ACTIVATE_USOCK_STATE(cbfunc) \ - do { \ - pmix_usock_op_t *op; \ - op = OBJ_NEW(pmix_usock_op_t); \ - opal_event_set(mca_pmix_native_component.evbase, &op->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), op); \ - opal_event_set_priority(&op->ev, OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \ +#define PMIX_ACTIVATE_USOCK_STATE(cbfunc) \ + do { \ + pmix_usock_op_t *op; \ + op = OBJ_NEW(pmix_usock_op_t); \ + opal_event_set(opal_async_event_base, &op->ev, -1, \ + OPAL_EV_WRITE, (cbfunc), op); \ + opal_event_set_priority(&op->ev, OPAL_EV_RTE_HI_PRI); \ + opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \ } while(0); void pmix_usock_send_recv(int fd, short args, void *cbdata) @@ -275,20 +275,20 @@ static void pmix_usock_try_connect(int fd, short args, void *cbdata) OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* setup event callbacks */ - opal_event_set(mca_pmix_native_component.evbase, + opal_event_set(opal_async_event_base, &mca_pmix_native_component.recv_event, mca_pmix_native_component.sd, OPAL_EV_READ|OPAL_EV_PERSIST, pmix_usock_recv_handler, NULL); - opal_event_set_priority(&mca_pmix_native_component.recv_event, OPAL_EV_MSG_LO_PRI); + opal_event_set_priority(&mca_pmix_native_component.recv_event, OPAL_EV_RTE_HI_PRI); mca_pmix_native_component.recv_ev_active = false; - opal_event_set(mca_pmix_native_component.evbase, + opal_event_set(opal_async_event_base, &mca_pmix_native_component.send_event, mca_pmix_native_component.sd, OPAL_EV_WRITE|OPAL_EV_PERSIST, pmix_usock_send_handler, NULL); - opal_event_set_priority(&mca_pmix_native_component.send_event, OPAL_EV_MSG_LO_PRI); + opal_event_set_priority(&mca_pmix_native_component.send_event, OPAL_EV_RTE_HI_PRI); mca_pmix_native_component.send_ev_active = false; /* setup the socket as non-blocking */ diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index e122cf365b7..6cd594edd9a 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -280,11 +280,11 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * that takes into account directives and availability of * non-blocking operations */ -#define OPAL_FENCE(p, s, cf, cd) \ +#define OPAL_FENCE(p, s, cf, cd) \ opal_pmix.fence((p), (s)); /* callback handler for errors */ -typedef void (*opal_pmix_errhandler_fn_t)(int error); +typedef void (*opal_pmix_errhandler_fn_t)(int error, opal_proc_t *proc); /**** DEFINE THE PUBLIC API'S **** **** NOTE THAT WE DO NOT HAVE A 1:1 MAPPING OF APIs **** diff --git a/opal/mca/sec/basic/sec_basic.c b/opal/mca/sec/basic/sec_basic.c index a896ad2653b..fdc07131150 100644 --- a/opal/mca/sec/basic/sec_basic.c +++ b/opal/mca/sec/basic/sec_basic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +37,7 @@ opal_sec_base_module_t opal_sec_basic_module = { authenticate }; -static opal_sec_cred_t my_cred; +static opal_sec_cred_t my_cred = {NULL, NULL, 0}; static bool initialized = false; static int init(void) @@ -47,9 +47,16 @@ static int init(void) static void finalize(void) { - if (initialized) { + if (NULL != my_cred.credential) { free(my_cred.credential); + my_cred.credential = NULL; } + if (NULL != my_cred.method) { + free(my_cred.method); + my_cred.method = NULL; + } + my_cred.size = 0; + initialized = false; } static int get_my_cred(int dstorehandle, diff --git a/opal/mca/sec/munge/sec_munge.c b/opal/mca/sec/munge/sec_munge.c index 7ebfa0bc1e5..b895392891e 100644 --- a/opal/mca/sec/munge/sec_munge.c +++ b/opal/mca/sec/munge/sec_munge.c @@ -40,7 +40,7 @@ opal_sec_base_module_t opal_sec_munge_module = { authenticate }; -static opal_sec_cred_t my_cred; +static opal_sec_cred_t my_cred = {NULL, NULL, 0}; static bool initialized = false; static bool refresh = false; @@ -70,9 +70,15 @@ static int init(void) static void finalize(void) { - if (initialized) { + if (NULL != my_cred.credential) { free(my_cred.credential); + my_cred.credential = NULL; + } + if (NULL != my_cred.method) { + free(my_cred.method); + my_cred.method = NULL; } + my_cred.size = 0; } static int get_my_cred(int dstorehandle, diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index 5fc619d6074..ccf1613a57d 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -15,6 +15,7 @@ * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -811,7 +812,7 @@ int opal_cr_coord(int state) * Otherwise it may/will use stale file descriptors which will disrupt * the intended users of the soon-to-be newly assigned file descriptors. */ - opal_event_reinit(opal_event_base); + opal_event_reinit(opal_sync_event_base); /* * Flush if() functionality, since it caches system specific info. diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index 382deb3c324..25e0b1ac20d 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -47,6 +47,7 @@ #include "opal/mca/hwloc/base/base.h" #include "opal/mca/event/base/base.h" #include "opal/runtime/opal_progress.h" +#include "opal/runtime/opal_progress_threads.h" #include "opal/mca/shmem/base/base.h" #if OPAL_ENABLE_FT_CR == 1 #include "opal/mca/compress/base/base.h" @@ -133,9 +134,7 @@ opal_finalize(void) } return OPAL_SUCCESS; } - - opal_progress_finalize(); - + /* close the checkpoint and restart service */ opal_cr_finalize(); @@ -173,9 +172,6 @@ opal_finalize(void) /* close the shmem framework */ (void) mca_base_framework_close(&opal_shmem_base_framework); - /* close the sec framework */ - (void) mca_base_framework_close(&opal_sec_base_framework); - /* finalize util code */ opal_finalize_util(); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index cea62629119..ad9ba464ae6 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -470,16 +470,6 @@ opal_init(int* pargc, char*** pargv) goto return_error; } - /* - * Initialize the general progress engine - */ - if (OPAL_SUCCESS != (ret = opal_progress_init())) { - error = "opal_progress_init"; - goto return_error; - } - /* we want to tick the event library whenever possible */ - opal_progress_event_users_increment(); - /* setup the shmem framework */ if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_shmem_base_framework, 0))) { error = "opal_shmem_base_open"; diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index d7ecf086d73..d2563f4f704 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -14,6 +14,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -168,7 +169,7 @@ opal_progress(void) event_progress_last_time = (num_event_users > 0) ? now - event_progress_delta : now; - events += opal_event_loop(opal_event_base, opal_progress_event_flag); + events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag); } #else /* OPAL_PROGRESS_USE_TIMERS */ @@ -177,7 +178,7 @@ opal_progress(void) if (OPAL_THREAD_ADD32(&event_progress_counter, -1) <= 0 ) { event_progress_counter = (num_event_users > 0) ? 0 : event_progress_delta; - events += opal_event_loop(opal_event_base, opal_progress_event_flag); + events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag); } #endif /* OPAL_PROGRESS_USE_TIMERS */ diff --git a/opal/runtime/opal_progress_threads.c b/opal/runtime/opal_progress_threads.c index 2c24326045b..fc70388db9d 100644 --- a/opal/runtime/opal_progress_threads.c +++ b/opal/runtime/opal_progress_threads.c @@ -175,11 +175,13 @@ void opal_stop_progress_thread(char *name, bool cleanup) * case the thread is blocked in a call to select for * a long time */ if (trk->block_active) { + trk->block_active = false; i=1; write(trk->pipe[1], &i, sizeof(int)); } /* wait for thread to exit */ opal_thread_join(&trk->engine, NULL); + /* mark the block as inactive */ /* cleanup, if they indicated they are done with this event base */ if (cleanup) { opal_list_remove_item(&tracking, &trk->super); diff --git a/opal/runtime/opal_progress_threads.h b/opal/runtime/opal_progress_threads.h index ec1f8e2adf4..773f4e9b7fa 100644 --- a/opal/runtime/opal_progress_threads.h +++ b/opal/runtime/opal_progress_threads.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index 8ee289e040d..c0e36ef86bb 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -9,6 +9,7 @@ * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,6 +27,7 @@ #include "opal/util/output.h" #include "opal/dss/dss.h" +#include "opal/errhandler/opal_errhandler.h" #include "opal/mca/pmix/pmix.h" #include "orte/util/error_strings.h" @@ -68,10 +70,10 @@ orte_errmgr_base_module_t orte_errmgr_default_app_module = { }; static void proc_errors(int fd, short args, void *cbdata); -static void pmix_error(int error) +static void opal_error(int error, opal_proc_t *proc, void *cbdata) { /* push it into our event base */ - ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, ORTE_PROC_STATE_COMM_FAILED); + ORTE_ACTIVATE_PROC_STATE(&proc->proc_name, error); } /************************ @@ -82,11 +84,9 @@ static int init(void) /* setup state machine to trap proc errors */ orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); - /* register an errhandler with the PMIx framework so - * we can know of loss of connection to the server */ - if (NULL != opal_pmix.register_errhandler) { - opal_pmix.register_errhandler(pmix_error); - } + /* register an errhandler with the OPAL layer so + * we can know of errors down there */ + opal_register_errhandler(opal_error, NULL); return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index b6b4068e8d9..8c6e38dc1e0 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -75,8 +75,6 @@ #include "orte/mca/ess/base/base.h" -static bool progress_thread_running = false; - int orte_ess_base_app_setup(bool db_restrict_local) { int ret; @@ -114,9 +112,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) opal_proc_local_set(&orte_process_info.super); } - /* get a separate orte event base */ - orte_event_base = opal_start_progress_thread("orte", true); - progress_thread_running = true; /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -321,12 +316,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) } return ORTE_SUCCESS; error: - if (!progress_thread_running) { - /* can't send the help message, so ensure it - * comes out locally - */ - orte_show_help_finalize(); - } orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); @@ -337,13 +326,6 @@ int orte_ess_base_app_finalize(void) { orte_cr_finalize(); - /* release the event base so we stop all potential - * race conditions in the messaging teardown */ - if (progress_thread_running) { - opal_stop_progress_thread("orte", false); - progress_thread_running = false; - } - #if OPAL_ENABLE_FT_CR == 1 (void) mca_base_framework_close(&orte_snapc_base_framework); (void) mca_base_framework_close(&orte_sstore_base_framework); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 5914490974f..034a67ab1e1 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -838,6 +838,7 @@ static int rte_finalize(void) fclose(orte_xml_fp); } } + orte_wait_finalize(); return ORTE_SUCCESS; } diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index 4dd72e6c623..6937806150e 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -181,7 +181,7 @@ typedef struct orte_iof_base_t orte_iof_base_t; *(rv) = rev; \ opal_event_set(orte_event_base, \ rev->ev, (fid), \ - OPAL_EV_READ, \ + OPAL_EV_READ | OPAL_EV_PERSIST, \ (cbfunc), rev); \ opal_event_set_priority(rev->ev, ORTE_MSG_PRI); \ if ((actv)) { \ diff --git a/orte/mca/iof/base/iof_base_frame.c b/orte/mca/iof/base/iof_base_frame.c index f0018b5cf4c..fa398dd016d 100644 --- a/orte/mca/iof/base/iof_base_frame.c +++ b/orte/mca/iof/base/iof_base_frame.c @@ -127,7 +127,10 @@ static void orte_iof_base_read_event_construct(orte_iof_read_event_t* rev) } static void orte_iof_base_read_event_destruct(orte_iof_read_event_t* rev) { - opal_event_free(rev->ev); + if (rev->active) { + opal_event_del(rev->ev); + } + opal_event_free(rev->ev); if (0 <= rev->fd) { OPAL_OUTPUT_VERBOSE((20, orte_iof_base_framework.framework_output, "%s iof: closing fd %d for process %s", @@ -151,6 +154,9 @@ static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev) } static void orte_iof_base_write_event_destruct(orte_iof_write_event_t* wev) { + if (wev->pending) { + opal_event_del(wev->ev); + } opal_event_free(wev->ev); if (ORTE_PROC_IS_HNP && NULL != orte_xml_fp) { int xmlfd = fileno(orte_xml_fp); diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 26ed8450201..e5d22be4acf 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -44,7 +44,7 @@ #include "orte/mca/iof/base/base.h" int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream, - unsigned char *data, int numbytes, + unsigned char *data, int numbytes, orte_iof_write_event_t *channel) { char starttag[ORTE_IOF_BASE_TAG_MAX], endtag[ORTE_IOF_BASE_TAG_MAX], *suffix; @@ -285,8 +285,8 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) output = (orte_iof_write_output_t*)item; if (0 == output->numbytes) { /* indicates we are to close this stream */ - OBJ_RELEASE(sink); - return; + OBJ_RELEASE(output); + goto ABORT; } num_written = write(wev->fd, output->data, output->numbytes); if (num_written < 0) { @@ -332,5 +332,4 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) ABORT: opal_event_del(wev->ev); wev->pending = false; - } diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index bd47bb9f51e..4be0cb64581 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -458,6 +458,13 @@ static int finalize(void) orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP); + /* clear all the posted events */ + OPAL_LIST_DESTRUCT(&mca_iof_hnp_component.procs); + OPAL_LIST_DESTRUCT(&mca_iof_hnp_component.sinks); + + if (mca_iof_hnp_component.stdinev->active) { + opal_event_del(mca_iof_hnp_component.stdinev->ev); + } return ORTE_SUCCESS; } diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 9027d72ab60..6a54f4122a7 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -107,7 +107,6 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { - opal_event_add(rev->ev, 0); return; } @@ -322,8 +321,5 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) } } - /* re-add the event */ - opal_event_add(rev->ev, 0); - return; } diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 5b8ed7c4938..526039d3d8d 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -148,7 +148,7 @@ static int tcp_component_open(void) mca_oob_tcp_component.ipv6conns = NULL; mca_oob_tcp_component.ipv6ports = NULL; #endif - + /* if_include and if_exclude need to be mutually exclusive */ if (OPAL_SUCCESS != mca_base_var_check_exclusive("orte", diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index b84ef4a7702..8916ba1c761 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -1292,8 +1292,7 @@ static int rsh_finalize(void) orte_proc_t *proc; pid_t ret; - /* remove launch event */ - opal_event_del(&launch_event); + /* cleanup launch list */ OPAL_LIST_DESTRUCT(&launch_list); /* cleanup any pending recvs */ diff --git a/orte/mca/qos/ack/qos_ack_component.c b/orte/mca/qos/ack/qos_ack_component.c index 9acb5c500e5..927a5fb25e8 100644 --- a/orte/mca/qos/ack/qos_ack_component.c +++ b/orte/mca/qos/ack/qos_ack_component.c @@ -188,8 +188,8 @@ static int ack_open (void *qos_channel, opal_buffer_t * buf) { eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000; /* init outstanding msg hotel */ opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS, - eviction_timeout, 0, - orte_qos_ack_msg_ack_timeout_callback); + orte_event_base, eviction_timeout, 0, + orte_qos_ack_msg_ack_timeout_callback); OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s ack_open channel = %p init hotel timeout =%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -466,7 +466,7 @@ static int ack_init_recv (void *channel, opal_list_t *attributes) { eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000; /* init outstanding msg hotel */ opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS, - eviction_timeout, 0, + orte_event_base, eviction_timeout, 0, orte_qos_ack_recv_msg_timeout_callback); OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s ack_open channel = %p init hotel timeout =%d", diff --git a/orte/mca/state/base/state_base_frame.c b/orte/mca/state/base/state_base_frame.c index 3838d901dd9..95776e00372 100644 --- a/orte/mca/state/base/state_base_frame.c +++ b/orte/mca/state/base/state_base_frame.c @@ -86,7 +86,6 @@ static void orte_state_caddy_construct(orte_state_caddy_t *caddy) } static void orte_state_caddy_destruct(orte_state_caddy_t *caddy) { - opal_event_del(&caddy->ev); if (NULL != caddy->jdata) { OBJ_RELEASE(caddy->jdata); } diff --git a/orte/runtime/orte_finalize.c b/orte/runtime/orte_finalize.c index 4a8b3291cb2..0f40002f263 100644 --- a/orte/runtime/orte_finalize.c +++ b/orte/runtime/orte_finalize.c @@ -26,6 +26,7 @@ #include "orte/constants.h" #include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress_threads.h" #include "opal/util/output.h" #include "orte/mca/ess/ess.h" @@ -82,7 +83,13 @@ int orte_finalize(void) /* Close the general debug stream */ opal_output_close(orte_debug_output); - /* finalize the opal utilities */ + /* shutdown the async progress thread, if we have one */ + if (ORTE_PROC_IS_APP) { + opal_stop_progress_thread("opal", true); + opal_event_base_free(opal_sync_event_base); + } + + /* finalize the opal utilities */ rc = opal_finalize(); return rc; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index c55aeb52534..e8dfcb17ace 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -125,9 +125,9 @@ ORTE_DECLSPEC extern int orte_exit_status; * that overrides the need to process MPI messages */ #define ORTE_ERROR_PRI OPAL_EV_ERROR_PRI -#define ORTE_MSG_PRI OPAL_EV_MSG_LO_PRI -#define ORTE_SYS_PRI OPAL_EV_SYS_LO_PRI -#define ORTE_INFO_PRI OPAL_EV_INFO_LO_PRI +#define ORTE_MSG_PRI OPAL_EV_RTE_HI_PRI +#define ORTE_SYS_PRI OPAL_EV_RTE_MED_PRI +#define ORTE_INFO_PRI OPAL_EV_RTE_LO_PRI /* define some common keys used in ORTE */ #define ORTE_DB_DAEMON_VPID "orte.daemon.vpid" diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index aec3f428bf6..71111cdbc6c 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -41,6 +41,7 @@ #include "opal/util/timings.h" #include "opal/runtime/opal.h" #include "opal/threads/threads.h" +#include "opal/runtime/opal_progress_threads.h" #include "orte/util/show_help.h" #include "orte/mca/ess/base/base.h" @@ -209,6 +210,24 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) pmix_server_register(); } + if (ORTE_PROC_IS_APP) { + /* create an asynchronously progressed event base using + * the progress thread support - the utility automatically + * blocks the thread in an event */ + if (NULL == (opal_async_event_base = opal_start_progress_thread("opal", true))) { + return OPAL_ERROR; + } + orte_event_base = opal_async_event_base; + } else { + /* ORTE tools "block" in their own loop over the event + * base, so no progress thread is required */ + orte_event_base = opal_sync_event_base; + } + /* set the number of priorities */ + if (0 < OPAL_EVENT_NUM_PRI) { + opal_event_base_priority_init(orte_event_base, OPAL_EVENT_NUM_PRI); + } + /* open the ESS and select the correct module for this environment */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_ess_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -220,15 +239,6 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } - if (!ORTE_PROC_IS_APP) { - /* ORTE tools "block" in their own loop over the event - * base, so no progress thread is required - apps will - * start their progress thread in ess_base_std_app.c - * at the appropriate point - */ - orte_event_base = opal_event_base; - } - /* initialize the RTE for this environment */ if (ORTE_SUCCESS != (ret = orte_ess.init())) { error = "orte_ess_init"; diff --git a/orte/test/system/opal_hotel.c b/orte/test/system/opal_hotel.c index c0d68d70e01..3922e3da55e 100644 --- a/orte/test/system/opal_hotel.c +++ b/orte/test/system/opal_hotel.c @@ -47,7 +47,7 @@ int main(int argc, char* argv[]) } OBJ_CONSTRUCT(&hotel, opal_hotel_t); - opal_hotel_init(&hotel, NUM_RMS, 3000000, OPAL_EV_SYS_HI_PRI, evict_cbfunc); + opal_hotel_init(&hotel, NUM_RMS, opal_sync_event_base, 3000000, OPAL_EV_SYS_HI_PRI, evict_cbfunc); /* prep the occupants */ for (i=0; i < NUM_OCC; i++) { diff --git a/orte/tools/orte-server/orte-server.c b/orte/tools/orte-server/orte-server.c index 55c9328e186..46ef0b90465 100644 --- a/orte/tools/orte-server/orte-server.c +++ b/orte/tools/orte-server/orte-server.c @@ -238,10 +238,10 @@ int main(int argc, char *argv[]) /* Set signal handlers to catch kill signals so we can properly clean up * after ourselves. */ - opal_event_set(opal_event_base, &term_handler, SIGTERM, OPAL_EV_SIGNAL, + opal_event_set(orte_event_base, &term_handler, SIGTERM, OPAL_EV_SIGNAL, shutdown_callback, NULL); opal_event_add(&term_handler, NULL); - opal_event_set(opal_event_base, &int_handler, SIGINT, OPAL_EV_SIGNAL, + opal_event_set(orte_event_base, &int_handler, SIGINT, OPAL_EV_SIGNAL, shutdown_callback, NULL); opal_event_add(&int_handler, NULL);