diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index 4adf000eb9b..37d9fdd5531 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -86,9 +86,12 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_list_item_t *item) { opal_counted_pointer_t tail; + const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost; item->opal_list_next = &fifo->opal_fifo_ghost; + opal_atomic_wmb (); + do { tail.value = fifo->opal_fifo_tail.value; @@ -99,7 +102,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_atomic_wmb (); - if (&fifo->opal_fifo_ghost == tail.data.item) { + if (ghost == tail.data.item) { /* update the head */ opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}; opal_update_counted_pointer (&fifo->opal_fifo_head, head, item); @@ -116,24 +119,23 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; + opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost; opal_counted_pointer_t head, tail; do { - head.value = fifo->opal_fifo_head.value; + opal_read_counted_pointer (&fifo->opal_fifo_head, &head); tail.value = fifo->opal_fifo_tail.value; opal_atomic_rmb (); item = (opal_list_item_t *) head.data.item; next = (opal_list_item_t *) item->opal_list_next; - if (&fifo->opal_fifo_ghost == tail.data.item && &fifo->opal_fifo_ghost == item) { + if (ghost == tail.data.item && ghost == item) { return NULL; } /* the head or next pointer are in an inconsistent state. keep looping. */ - if (tail.data.item != item && &fifo->opal_fifo_ghost != tail.data.item && - &fifo->opal_fifo_ghost == next) { + if (tail.data.item != item && ghost != tail.data.item && ghost == next) { continue; } @@ -146,14 +148,14 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) opal_atomic_wmb (); /* check for tail and head consistency */ - if (&fifo->opal_fifo_ghost == next) { + if (ghost == next) { /* the head was just set to &fifo->opal_fifo_ghost. try to update the tail as well */ - if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, &fifo->opal_fifo_ghost)) { + if (!opal_update_counted_pointer (&fifo->opal_fifo_tail, tail, ghost)) { /* tail was changed by a push operation. wait for the item's next pointer to be se then * update the head */ /* wait for next pointer to be updated by push */ - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } @@ -166,7 +168,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) head.value = fifo->opal_fifo_head.value; next = (opal_list_item_t *) item->opal_list_next; - assert (&fifo->opal_fifo_ghost == head.data.item); + assert (ghost == head.data.item); fifo->opal_fifo_head.data.item = next; opal_atomic_wmb (); diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index c240280a836..bc6f3f783c6 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -74,8 +74,33 @@ static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t return opal_atomic_cmpset_128 (&addr->value, old.value, new_p.value); } +__opal_attribute_always_inline__ +static inline void opal_read_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *value) +{ + /* most platforms do not read the value atomically so make sure we read the counted pointer in a specific order */ + value->data.counter = addr->data.counter; + opal_atomic_rmb (); + value->data.item = addr->data.item; +} + #endif +/** + * @brief Helper function for lifo/fifo to sleep this thread if excessive contention is detected + */ +static inline void _opal_lifo_release_cpu (void) +{ + /* NTH: there are many ways to cause the current thread to be suspended. This one + * should work well in most cases. Another approach would be to use poll (NULL, 0, ) but + * the interval will be forced to be in ms (instead of ns or us). Note that there + * is a performance improvement for the lifo test when this call is made on detection + * of contention but it may not translate into actually MPI or application performance + * improvements. */ + static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 }; + nanosleep (&interval, NULL); +} + + /* Atomic Last In First Out lists. If we are in a multi-threaded environment then the * atomicity is insured via the compare-and-swap operation, if not we simply do a read * and/or a write. @@ -142,10 +167,8 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) opal_list_item_t *item; do { - - old_head.data.counter = lifo->opal_lifo_head.data.counter; - opal_atomic_rmb (); - old_head.data.item = item = (opal_list_item_t*)lifo->opal_lifo_head.data.item; + opal_read_counted_pointer (&lifo->opal_lifo_head, &old_head); + item = (opal_list_item_t *) old_head.data.item; if (item == &lifo->opal_lifo_ghost) { return NULL; @@ -187,18 +210,6 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo, #if OPAL_HAVE_ATOMIC_LLSC_PTR -static inline void _opal_lifo_release_cpu (void) -{ - /* NTH: there are many ways to cause the current thread to be suspended. This one - * should work well in most cases. Another approach would be to use poll (NULL, 0, ) but - * the interval will be forced to be in ms (instead of ns or us). Note that there - * is a performance improvement for the lifo test when this call is made on detection - * of contention but it may not translate into actually MPI or application performance - * improvements. */ - static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 }; - nanosleep (&interval, NULL); -} - /* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO * is empty so we return NULL. */