|
| 1 | +From b65f2c16f389d82b54404df97eed838630b1ee90 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Szabolcs Nagy <szabolcs dot nagy at arm dot com> |
| 3 | +Date: Wed, 30 Nov 2016 11:44:32 +0000 |
| 4 | +Subject: [PATCH 2/5] Fix data races between pthread_create and dlopen |
| 5 | + |
| 6 | +This fixes a subset of the issues described in |
| 7 | +https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html |
| 8 | +without adding locks to pthread_create. |
| 9 | + |
| 10 | +Only races between dlopen and pthread_create were considered, |
| 11 | +and the asserts got removed that tried to check for concurrency |
| 12 | +issues. |
| 13 | + |
| 14 | +The patch is incomplete because dlclose, tls access and |
| 15 | +dl_iterate_phdr related code paths are not modified. |
| 16 | + |
| 17 | +dlclose should be updated in a similar fashion to dlopen |
| 18 | +to make the patch complete alternatively pthread_create |
| 19 | +may take the GL(dl_load_write_lock) to sync with dlclose |
| 20 | +or the GL(dl_load_lock) to sync with dlopen and dlclose |
| 21 | +(that would simplify the concurrency design, but increase |
| 22 | +lock contention on the locks). |
| 23 | + |
| 24 | +2016-11-30 Szabolcs Nagy < [email protected]> |
| 25 | + |
| 26 | + [BZ #19329] |
| 27 | + * elf/dl-open.c (dl_open_worker): Write GL(dl_tls_generation) |
| 28 | + atomically. |
| 29 | + * elf/dl-tls.c (_dl_allocate_tls_init): Read GL(dl_tls_generation), |
| 30 | + GL(dl_tls_max_dtv_idx), slotinfo entries and listp->next atomically. |
| 31 | + Remove assertions that cannot be guaranteed. |
| 32 | + (_dl_add_to_slotinfo): Write the slotinfo entries and listp->next |
| 33 | + atomically. |
| 34 | +--- |
| 35 | + elf/dl-open.c | 12 +++++-- |
| 36 | + elf/dl-tls.c | 87 ++++++++++++++++++++++++++++++++++++++++++++------- |
| 37 | + 2 files changed, 86 insertions(+), 13 deletions(-) |
| 38 | + |
| 39 | +diff --git a/elf/dl-open.c b/elf/dl-open.c |
| 40 | +index cec54db413..a45319e5fc 100644 |
| 41 | +--- a/elf/dl-open.c |
| 42 | ++++ b/elf/dl-open.c |
| 43 | +@@ -524,9 +524,17 @@ dl_open_worker (void *a) |
| 44 | + } |
| 45 | + |
| 46 | + /* Bump the generation number if necessary. */ |
| 47 | +- if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0)) |
| 48 | +- _dl_fatal_printf (N_("\ |
| 49 | ++ if (any_tls) |
| 50 | ++ { |
| 51 | ++ /* This cannot be in a data-race so non-atomic load is valid too. */ |
| 52 | ++ size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; |
| 53 | ++ /* Synchronize with _dl_allocate_tls_init (see notes there) and |
| 54 | ++ avoid storing an overflowed counter. */ |
| 55 | ++ if (__builtin_expect (newgen == 0, 0)) |
| 56 | ++ _dl_fatal_printf (N_("\ |
| 57 | + TLS generation counter wrapped! Please report this.")); |
| 58 | ++ atomic_store_release (&GL(dl_tls_generation), newgen); |
| 59 | ++ } |
| 60 | + |
| 61 | + /* We need a second pass for static tls data, because _dl_update_slotinfo |
| 62 | + must not be run while calls to _dl_add_to_slotinfo are still pending. */ |
| 63 | +diff --git a/elf/dl-tls.c b/elf/dl-tls.c |
| 64 | +index 4daf88af6e..c60bbd72ea 100644 |
| 65 | +--- a/elf/dl-tls.c |
| 66 | ++++ b/elf/dl-tls.c |
| 67 | +@@ -438,6 +438,36 @@ _dl_resize_dtv (dtv_t *dtv) |
| 68 | + } |
| 69 | + |
| 70 | + |
| 71 | ++/* |
| 72 | ++CONCURRENCY NOTES |
| 73 | ++ |
| 74 | ++dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state, |
| 75 | ++which may be concurrently read by pthread_create and tls access without taking |
| 76 | ++the lock, so atomic access should be used. The shared state: |
| 77 | ++ |
| 78 | ++ GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused). |
| 79 | ++ GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose. |
| 80 | ++ GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count |
| 81 | ++ and link_map for each module with a modid. |
| 82 | ++ |
| 83 | ++A module gets a modid assigned if it has tls, a modid identifies a slotinfo |
| 84 | ++entry and it is the index of the corresponding dtv slot. The generation count |
| 85 | ++is assigned to slotinfo entries of a newly loaded or unloaded module and its |
| 86 | ++newly loaded or unloaded dependencies. |
| 87 | ++ |
| 88 | ++TODO: dlclose may free memory read by a concurrent pthread_create or tls |
| 89 | ++access. This is broken now, so it is assumed that dlclose does not free |
| 90 | ++link_map structures while pthread_create or __tls_get_addr is reading them. |
| 91 | ++ |
| 92 | ++pthread_create calls _dl_allocate_tls_init (before creating the new thread), |
| 93 | ++which should guarantee that the dtv is in a consistent state at the end: |
| 94 | ++ |
| 95 | ++All slotinfo updates with generation <= dtv[0].counter are reflected in the |
| 96 | ++dtv and arbitrary later module unloads may also be reflected as unallocated |
| 97 | ++entries. (Note: a modid reuse implies a module unload and accessing tls in |
| 98 | ++an unloaded module is undefined.) |
| 99 | ++*/ |
| 100 | ++ |
| 101 | + void * |
| 102 | + internal_function |
| 103 | + _dl_allocate_tls_init (void *result) |
| 104 | +@@ -450,12 +480,24 @@ _dl_allocate_tls_init (void *result) |
| 105 | + struct dtv_slotinfo_list *listp; |
| 106 | + size_t total = 0; |
| 107 | + size_t maxgen = 0; |
| 108 | ++ /* Synchronizes with the increments in dl_{open,close}_worker. |
| 109 | ++ Slotinfo updates of this generation are sequenced before the |
| 110 | ++ write we read from here. */ |
| 111 | ++ size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation)); |
| 112 | ++ /* Either reads from the last write that is sequenced before the |
| 113 | ++ generation counter increment we synchronized with or a write |
| 114 | ++ made by a later dlopen/dlclose. dlclose may decrement this, |
| 115 | ++ but only if related modules are unloaded. So it is an upper |
| 116 | ++ bound on non-unloaded modids up to gen_count generation. */ |
| 117 | ++ size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); |
| 118 | + |
| 119 | + /* Check if the current dtv is big enough. */ |
| 120 | +- if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) |
| 121 | ++ if (dtv[-1].counter < dtv_slots) |
| 122 | + { |
| 123 | + /* Resize the dtv. */ |
| 124 | + dtv = _dl_resize_dtv (dtv); |
| 125 | ++ /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */ |
| 126 | ++ dtv_slots = dtv[-1].counter; |
| 127 | + |
| 128 | + /* Install this new dtv in the thread data structures. */ |
| 129 | + INSTALL_DTV (result, &dtv[-1]); |
| 130 | +@@ -472,22 +514,33 @@ _dl_allocate_tls_init (void *result) |
| 131 | + for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) |
| 132 | + { |
| 133 | + struct link_map *map; |
| 134 | ++ size_t gen; |
| 135 | + void *dest; |
| 136 | + |
| 137 | + /* Check for the total number of used slots. */ |
| 138 | +- if (total + cnt > GL(dl_tls_max_dtv_idx)) |
| 139 | ++ if (total + cnt > dtv_slots) |
| 140 | + break; |
| 141 | + |
| 142 | +- map = listp->slotinfo[cnt].map; |
| 143 | ++ /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */ |
| 144 | ++ map = atomic_load_acquire (&listp->slotinfo[cnt].map); |
| 145 | + if (map == NULL) |
| 146 | + /* Unused entry. */ |
| 147 | + continue; |
| 148 | + |
| 149 | ++ /* Consistent generation count with the map read above. |
| 150 | ++ Inconsistent gen may be read if the entry is being reused, |
| 151 | ++ in which case it is larger than gen_count and we skip it. */ |
| 152 | ++ gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); |
| 153 | ++ if (gen > gen_count) |
| 154 | ++ /* New entry. */ |
| 155 | ++ continue; |
| 156 | ++ |
| 157 | + /* Keep track of the maximum generation number. This might |
| 158 | + not be the generation counter. */ |
| 159 | +- assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation)); |
| 160 | +- maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); |
| 161 | ++ maxgen = MAX (maxgen, gen); |
| 162 | + |
| 163 | ++ /* TODO: concurrent dlclose may free map which would break |
| 164 | ++ the rest of the code below. */ |
| 165 | + dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; |
| 166 | + dtv[map->l_tls_modid].pointer.to_free = NULL; |
| 167 | + |
| 168 | +@@ -517,11 +570,15 @@ _dl_allocate_tls_init (void *result) |
| 169 | + } |
| 170 | + |
| 171 | + total += cnt; |
| 172 | +- if (total >= GL(dl_tls_max_dtv_idx)) |
| 173 | ++ if (total >= dtv_slots) |
| 174 | + break; |
| 175 | + |
| 176 | +- listp = listp->next; |
| 177 | +- assert (listp != NULL); |
| 178 | ++ /* Synchronize with dl_add_to_slotinfo. */ |
| 179 | ++ listp = atomic_load_acquire (&listp->next); |
| 180 | ++ /* dtv_slots is an upper bound on the number of entries we care |
| 181 | ++ about, the list may end sooner. */ |
| 182 | ++ if (listp == NULL) |
| 183 | ++ break; |
| 184 | + } |
| 185 | + |
| 186 | + /* The DTV version is up-to-date now. */ |
| 187 | +@@ -922,7 +979,7 @@ _dl_add_to_slotinfo (struct link_map *l) |
| 188 | + the first slot. */ |
| 189 | + assert (idx == 0); |
| 190 | + |
| 191 | +- listp = prevp->next = (struct dtv_slotinfo_list *) |
| 192 | ++ listp = (struct dtv_slotinfo_list *) |
| 193 | + malloc (sizeof (struct dtv_slotinfo_list) |
| 194 | + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); |
| 195 | + if (listp == NULL) |
| 196 | +@@ -937,9 +994,17 @@ _dl_add_to_slotinfo (struct link_map *l) |
| 197 | + listp->next = NULL; |
| 198 | + memset (listp->slotinfo, '\0', |
| 199 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); |
| 200 | ++ /* Add the new list item and synchronize with _dl_allocate_tls_init. */ |
| 201 | ++ atomic_store_release (&prevp->next, listp); |
| 202 | + } |
| 203 | + |
| 204 | + /* Add the information into the slotinfo data structure. */ |
| 205 | +- listp->slotinfo[idx].map = l; |
| 206 | +- listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; |
| 207 | ++ |
| 208 | ++ /* This cannot be in a data-race so non-atomic load would be valid too. */ |
| 209 | ++ size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; |
| 210 | ++ /* TODO: Concurrent readers may see an overflowed gen, which is bad, |
| 211 | ++ but overflow is guaranteed to crash the dlopen that is executing. */ |
| 212 | ++ atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen); |
| 213 | ++ /* Synchronize with _dl_allocate_tls_init (see notes there). */ |
| 214 | ++ atomic_store_release (&listp->slotinfo[idx].map, l); |
| 215 | + } |
| 216 | +-- |
| 217 | +2.39.5 |
| 218 | + |
0 commit comments