Drop dhashtab

aitap · aitap · commit 66ac645b56cf · 2025-12-04T13:45:50.000+03:00
diff --git a/src/data.table.h b/src/data.table.h
@@ -359,15 +359,6 @@ hashtab *hash_set_shared(hashtab *, SEXP key, R_xlen_t value);
 // Returns the value corresponding to the key present in the hash, otherwise returns ifnotfound.
 R_xlen_t hash_lookup(const hashtab *, SEXP key, R_xlen_t ifnotfound);
 
-// The dynamically-allocated hash table has a public field for the R protection wrapper.
-// Keep it PROTECTed while the table is in use.
-typedef struct dhash_tab {
-  SEXP prot;
-} dhashtab;
-dhashtab * dhash_create(size_t n);
-void dhash_set(dhashtab * h, SEXP key, R_xlen_t value);
-R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound);
-
 // mergelist.c
 SEXP cbindlist(SEXP x, SEXP copyArg);
 SEXP copyCols(SEXP x, SEXP cols);
diff --git a/src/hash.c b/src/hash.c
@@ -1,5 +1,3 @@
-#include <pthread.h>
-
 #include "data.table.h"
 
 struct hash_pair {
@@ -141,125 +139,3 @@ R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) {
 
   return ifnotfound; // # nocov
 }
-
-typedef struct dhashtab_ {
-  dhashtab public; // must be at offset 0
-  size_t size, used, limit;
-  uintptr_t multiplier;
-  struct hash_pair *table;
-} dhashtab_;
-
-static void dhash_finalizer(SEXP dhash) {
-  dhashtab_ * self = R_ExternalPtrAddr(dhash);
-  if (!self) return;
-  R_ClearExternalPtr(dhash);
-  free(self->table);
-  free(self);
-}
-
-static struct hash_pair * dhash_allocate(size_t n_full) {
-  if (n_full > SIZE_MAX / sizeof(struct hash_pair))
-    internal_error(__func__, "%zu hash table slots would overflow size_t", n_full); // # nocov
-  struct hash_pair * new = malloc(sizeof(struct hash_pair[n_full]));
-  if (!new) internal_error(__func__, "failed to malloc() %zu hash table slots", n_full); // # nocov
-  for (size_t i = 0; i < n_full; ++i) new[i] = (struct hash_pair){.key = NULL};
-  return new;
-}
-
-static dhashtab * dhash_create_(size_t n, double load_factor) {
-  size_t n_full = get_full_size(n, load_factor);
-
-  SEXP prot = PROTECT(R_MakeExternalPtr(NULL, R_NilValue, R_NilValue));
-  R_RegisterCFinalizerEx(prot, dhash_finalizer, TRUE);
-  dhashtab_ * self = malloc(sizeof(dhashtab_));
-  if (!self) internal_error(__func__, "failed to malloc() the hash table header"); // # nocov
-  *self = (dhashtab_){
-    .public = { .prot = prot },
-  };
-  R_SetExternalPtrAddr(prot, self);
-
-  self->table = dhash_allocate(n_full);
-  self->size = n_full;
-  self->limit = n;
-  self->multiplier = n_full * hash_multiplier1;
-  // this is the last time we're allowed to set the table parts piece by piece
-
-  UNPROTECT(1);
-  return &self->public;
-}
-
-dhashtab * dhash_create(size_t n) { return dhash_create_(n, .5); }
-
-static void dhash_enlarge(dhashtab_ * self) {
-  if (self->size > SIZE_MAX / 2)
-    internal_error(__func__, "doubling %zu elements would overflow size_t", self->size); // # nocov
-  size_t new_size = self->size * 2;
-  size_t new_mask = new_size - 1;
-  struct hash_pair * new = dhash_allocate(new_size);
-  uintptr_t new_multiplier = new_size * hash_multiplier1;
-  for (size_t i = 0; i < self->size; ++i) {
-    if (!self->table[i].key) continue;
-    for (size_t j = 0; j < new_size; ++j) {
-      size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask;
-      if (!new[ii].key) {
-        new[ii] = (struct hash_pair){
-          .key = self->table[i].key,
-          .value = self->table[i].value
-        };
-        break;
-      }
-    }
-  }
-  // This is thread-unsafe, but this function is only called either from a single-thread context, or
-  // from under an OpenMP critical section, so there's no reason to worry about another thread
-  // getting a use-after-free. They are all sleeping.
-  self->size = new_size;
-  self->limit *= 2;
-  self->multiplier = new_multiplier;
-  self->table = new;
-  #pragma omp flush
-}
-
-void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
-  dhashtab_ * self = (dhashtab_ *)h;
-  struct hash_pair *cell, *end;
-again:
-  cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1));
-  end = self->table + self->size - 1;
-  for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) {
-    if (cell->key == key) {
-      cell->value = value;
-      return;
-    } else if (!cell->key) {
-      if (self->used < self->limit) {
-        *cell = (struct hash_pair){ .key = key, .value = value };
-        ++self->used;
-        return;
-      }
-      dhash_enlarge(self);
-      goto again; // won't be needed next time with the limit doubled
-    }
-  }
-  internal_error( // # nocov
-    __func__, "did not find a free slot for key %p; size=%zu, used=%zu, limit=%zu",
-    (void*)key, self->size, self->used, self->limit
-  );
-}
-
-R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
-  #pragma omp flush // no locking or atomic access! this is bad
-  dhashtab_ self = *(dhashtab_ *)h;
-  R_xlen_t ret = ifnotfound;
-  const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1));
-  const struct hash_pair * end = self.table + self.size - 1;
-  for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) {
-    if (cell->key == key) {
-      ret = cell->value;
-      goto done;
-    } else if (!cell->key) {
-      goto done;
-    }
-  }
-done:
-  return ret;
-}