|
1 | | -#include <pthread.h> |
2 | | - |
3 | 1 | #include "data.table.h" |
4 | 2 |
|
5 | 3 | struct hash_pair { |
@@ -141,125 +139,3 @@ R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) { |
141 | 139 |
|
142 | 140 | return ifnotfound; // # nocov |
143 | 141 | } |
144 | | - |
145 | | -typedef struct dhashtab_ { |
146 | | - dhashtab public; // must be at offset 0 |
147 | | - size_t size, used, limit; |
148 | | - uintptr_t multiplier; |
149 | | - struct hash_pair *table; |
150 | | -} dhashtab_; |
151 | | - |
152 | | -static void dhash_finalizer(SEXP dhash) { |
153 | | - dhashtab_ * self = R_ExternalPtrAddr(dhash); |
154 | | - if (!self) return; |
155 | | - R_ClearExternalPtr(dhash); |
156 | | - free(self->table); |
157 | | - free(self); |
158 | | -} |
159 | | - |
160 | | -static struct hash_pair * dhash_allocate(size_t n_full) { |
161 | | - if (n_full > SIZE_MAX / sizeof(struct hash_pair)) |
162 | | - internal_error(__func__, "%zu hash table slots would overflow size_t", n_full); // # nocov |
163 | | - struct hash_pair * new = malloc(sizeof(struct hash_pair[n_full])); |
164 | | - if (!new) internal_error(__func__, "failed to malloc() %zu hash table slots", n_full); // # nocov |
165 | | - for (size_t i = 0; i < n_full; ++i) new[i] = (struct hash_pair){.key = NULL}; |
166 | | - return new; |
167 | | -} |
168 | | - |
169 | | -static dhashtab * dhash_create_(size_t n, double load_factor) { |
170 | | - size_t n_full = get_full_size(n, load_factor); |
171 | | - |
172 | | - SEXP prot = PROTECT(R_MakeExternalPtr(NULL, R_NilValue, R_NilValue)); |
173 | | - R_RegisterCFinalizerEx(prot, dhash_finalizer, TRUE); |
174 | | - dhashtab_ * self = malloc(sizeof(dhashtab_)); |
175 | | - if (!self) internal_error(__func__, "failed to malloc() the hash table header"); // # nocov |
176 | | - *self = (dhashtab_){ |
177 | | - .public = { .prot = prot }, |
178 | | - }; |
179 | | - R_SetExternalPtrAddr(prot, self); |
180 | | - |
181 | | - self->table = dhash_allocate(n_full); |
182 | | - self->size = n_full; |
183 | | - self->limit = n; |
184 | | - self->multiplier = n_full * hash_multiplier1; |
185 | | - // this is the last time we're allowed to set the table parts piece by piece |
186 | | - |
187 | | - UNPROTECT(1); |
188 | | - return &self->public; |
189 | | -} |
190 | | - |
191 | | -dhashtab * dhash_create(size_t n) { return dhash_create_(n, .5); } |
192 | | - |
193 | | -static void dhash_enlarge(dhashtab_ * self) { |
194 | | - if (self->size > SIZE_MAX / 2) |
195 | | - internal_error(__func__, "doubling %zu elements would overflow size_t", self->size); // # nocov |
196 | | - size_t new_size = self->size * 2; |
197 | | - size_t new_mask = new_size - 1; |
198 | | - struct hash_pair * new = dhash_allocate(new_size); |
199 | | - uintptr_t new_multiplier = new_size * hash_multiplier1; |
200 | | - for (size_t i = 0; i < self->size; ++i) { |
201 | | - if (!self->table[i].key) continue; |
202 | | - for (size_t j = 0; j < new_size; ++j) { |
203 | | - size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask; |
204 | | - if (!new[ii].key) { |
205 | | - new[ii] = (struct hash_pair){ |
206 | | - .key = self->table[i].key, |
207 | | - .value = self->table[i].value |
208 | | - }; |
209 | | - break; |
210 | | - } |
211 | | - } |
212 | | - } |
213 | | - // This is thread-unsafe, but this function is only called either from a single-thread context, or |
214 | | - // from under an OpenMP critical section, so there's no reason to worry about another thread |
215 | | - // getting a use-after-free. They are all sleeping. |
216 | | - self->size = new_size; |
217 | | - self->limit *= 2; |
218 | | - self->multiplier = new_multiplier; |
219 | | - self->table = new; |
220 | | - #pragma omp flush |
221 | | -} |
222 | | - |
223 | | -void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) { |
224 | | - dhashtab_ * self = (dhashtab_ *)h; |
225 | | - struct hash_pair *cell, *end; |
226 | | -again: |
227 | | - cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1)); |
228 | | - end = self->table + self->size - 1; |
229 | | - for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) { |
230 | | - if (cell->key == key) { |
231 | | - cell->value = value; |
232 | | - return; |
233 | | - } else if (!cell->key) { |
234 | | - if (self->used < self->limit) { |
235 | | - *cell = (struct hash_pair){ .key = key, .value = value }; |
236 | | - ++self->used; |
237 | | - return; |
238 | | - } |
239 | | - dhash_enlarge(self); |
240 | | - goto again; // won't be needed next time with the limit doubled |
241 | | - } |
242 | | - } |
243 | | - internal_error( // # nocov |
244 | | - __func__, "did not find a free slot for key %p; size=%zu, used=%zu, limit=%zu", |
245 | | - (void*)key, self->size, self->used, self->limit |
246 | | - ); |
247 | | -} |
248 | | - |
249 | | -R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) { |
250 | | - #pragma omp flush // no locking or atomic access! this is bad |
251 | | - dhashtab_ self = *(dhashtab_ *)h; |
252 | | - R_xlen_t ret = ifnotfound; |
253 | | - const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1)); |
254 | | - const struct hash_pair * end = self.table + self.size - 1; |
255 | | - for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) { |
256 | | - if (cell->key == key) { |
257 | | - ret = cell->value; |
258 | | - goto done; |
259 | | - } else if (!cell->key) { |
260 | | - goto done; |
261 | | - } |
262 | | - } |
263 | | -done: |
264 | | - return ret; |
265 | | -} |
0 commit comments