1+ #pragma once
2+
3+ #include < bit>
4+ #include < cassert>
5+ #include < climits>
6+ #include < cstddef>
7+ #include < type_traits>
8+
9+ #include < openvic-dataloader/detail/Utility.hpp>
10+
11+ namespace ovdl ::detail {
12+ // / A simple hash table for trivial keys with linear probing.
13+ // / It is non-owning as it does not store the used memory resource.
14+ template <typename Traits, std::size_t MinTableSize>
15+ class HashTable {
16+ public:
17+ using value_type = typename Traits::value_type;
18+ static_assert (std::is_trivial_v<value_type>);
19+
20+ constexpr HashTable () = default;
21+
22+ template <typename ResourcePtr>
23+ void free (ResourcePtr resource) {
24+ if (_table_capacity == 0 ) {
25+ return ;
26+ }
27+
28+ resource->deallocate (_table, _table_capacity * sizeof (value_type), alignof (value_type));
29+ _table = nullptr ;
30+ _table_size = 0 ;
31+ _table_capacity = 0 ;
32+ }
33+
34+ struct entry_handle {
35+ HashTable* _self;
36+ value_type* _entry;
37+ bool _valid;
38+
39+ explicit operator bool () const {
40+ return _valid;
41+ }
42+
43+ std::size_t index () const {
44+ return std::size_t (_entry - _self->_table );
45+ }
46+
47+ value_type& get () const {
48+ assert (*this );
49+ return *_entry;
50+ }
51+
52+ void create (const value_type& value) {
53+ assert (!*this );
54+ *_entry = value;
55+ ++_self->_table_size ;
56+ _valid = true ;
57+ }
58+
59+ void remove () {
60+ assert (*this );
61+ Traits::fill_removed (_entry, 1 );
62+ --_self->_table_size ;
63+ _valid = false ;
64+ }
65+ };
66+
67+ // Looks for an entry in the table, creating one if necessary.
68+ //
69+ // If it is already in the table, returns a pointer to its valid entry.
70+ //
71+ // Otherwise, locates a new entry for that value and returns a pointer to it which is currently
72+ // invalid. Invariants of map are broken until the ptr has been written to.
73+ template <typename Key>
74+ entry_handle lookup_entry (const Key& key, Traits traits = {}) {
75+ assert (_table_size < _table_capacity);
76+
77+ auto hash = traits.hash (key);
78+ auto table_idx = hash & (_table_capacity - 1 );
79+
80+ while (true ) {
81+ auto entry = _table + table_idx;
82+ if (Traits::is_unoccupied (*entry)) {
83+ // We found an empty entry, return it.
84+ return { this , entry, false };
85+ }
86+
87+ // Check whether the entry is the same string.
88+ if (traits.is_equal (*entry, key)) {
89+ // It is already in the table, return it.
90+ return { this , entry, true };
91+ }
92+
93+ // Go to next entry.
94+ table_idx = (table_idx + 1 ) & (_table_capacity - 1 );
95+ }
96+ }
97+ template <typename Key>
98+ value_type* lookup (const Key& key, Traits traits = {}) const {
99+ if (_table_size == 0 ) {
100+ return nullptr ;
101+ }
102+
103+ auto entry = const_cast <HashTable*>(this )->lookup_entry (key, traits);
104+ return entry ? &entry.get () : nullptr ;
105+ }
106+
107+ bool should_rehash () const {
108+ return _table_size >= _table_capacity / 2 ;
109+ }
110+
111+ static constexpr std::size_t to_table_capacity (unsigned long long cap) {
112+ if (cap < MinTableSize) {
113+ return MinTableSize;
114+ }
115+
116+ // Round up to next power of two.
117+ return std::size_t (1 ) << (int (sizeof (cap) * CHAR_BIT) - std::countl_zero<size_t >(cap - 1 ));
118+ }
119+
120+ template <typename ResourcePtr, typename Callback = void (*)(entry_handle, std::size_t )>
121+ void rehash (
122+ ResourcePtr resource, std::size_t new_capacity, Traits traits = {},
123+ Callback entry_cb = +[](entry_handle, std::size_t ) {}) {
124+ assert (new_capacity == to_table_capacity (new_capacity));
125+ if (new_capacity <= _table_capacity) {
126+ return ;
127+ }
128+
129+ auto old_table = _table;
130+ auto old_capacity = _table_capacity;
131+
132+ // Allocate a bigger, currently empty table.
133+ _table = static_cast <value_type*>(
134+ resource->allocate (new_capacity * sizeof (value_type), alignof (value_type)));
135+ _table_capacity = new_capacity;
136+ Traits::fill_unoccupied (_table, _table_capacity);
137+
138+ // Insert existing values into the new table.
139+ if (_table_size > 0 ) {
140+ _table_size = 0 ;
141+
142+ for (auto entry = old_table; entry != old_table + old_capacity; ++entry) {
143+ if (!Traits::is_unoccupied (*entry)) {
144+ auto new_entry = lookup_entry (*entry, traits);
145+ new_entry.create (*entry);
146+ entry_cb (new_entry, std::size_t (entry - old_table));
147+ }
148+ }
149+ }
150+
151+ if (old_capacity > 0 ) {
152+ resource->deallocate (old_table, old_capacity * sizeof (value_type), alignof (value_type));
153+ }
154+ }
155+ template <typename ResourcePtr, typename Callback = void (*)(entry_handle, std::size_t )>
156+ void rehash (
157+ ResourcePtr resource, Traits traits = {},
158+ Callback entry_cb = +[](entry_handle, std::size_t ) {}) {
159+ rehash (resource, to_table_capacity (2 * _table_capacity), traits, entry_cb);
160+ }
161+
162+ // === access ===//
163+ std::size_t size () const {
164+ return _table_size;
165+ }
166+ std::size_t capacity () const {
167+ return _table_capacity;
168+ }
169+
170+ struct entry_range {
171+ struct iterator {
172+ using value_type = std::remove_cv_t <entry_handle>;
173+ using reference = entry_handle;
174+ struct pointer {
175+ value_type value;
176+
177+ constexpr value_type* operator ->() noexcept {
178+ return &value;
179+ }
180+ };
181+ using difference_type = std::ptrdiff_t ;
182+ using iterator_category = std::forward_iterator_tag;
183+
184+ constexpr reference operator *() const noexcept {
185+ return static_cast <const iterator&>(*this ).deref ();
186+ }
187+ constexpr pointer operator ->() const noexcept {
188+ return pointer { **this };
189+ }
190+
191+ constexpr iterator& operator ++() noexcept {
192+ auto & derived = static_cast <iterator&>(*this );
193+ derived.increment ();
194+ return derived;
195+ }
196+ constexpr iterator operator ++(int ) noexcept {
197+ auto & derived = static_cast <iterator&>(*this );
198+ auto copy = derived;
199+ derived.increment ();
200+ return copy;
201+ }
202+
203+ friend constexpr bool operator ==(const iterator& lhs, const iterator& rhs) {
204+ return lhs.equal (rhs);
205+ }
206+ friend constexpr bool operator !=(const iterator& lhs, const iterator& rhs) {
207+ return !lhs.equal (rhs);
208+ }
209+
210+ HashTable* _self;
211+ HashTable::value_type* _cur;
212+
213+ iterator () : _self(nullptr ), _cur(nullptr ) {}
214+ explicit iterator (HashTable& self, HashTable::value_type* cur)
215+ : _self(&self), _cur(cur) {}
216+
217+ entry_handle deref () const {
218+ return { _self, _cur, true };
219+ }
220+ void increment () {
221+ auto end = _self->_table + _self->_table_capacity ;
222+ do {
223+ ++_cur;
224+ } while (_cur != end && Traits::is_unoccupied (*_cur));
225+ }
226+ bool equal (iterator rhs) const {
227+ return _cur == rhs._cur ;
228+ }
229+ };
230+
231+ iterator begin () const {
232+ if (_self->size () == 0 ) {
233+ return {};
234+ }
235+
236+ auto cur = _self->_table ;
237+ while (Traits::is_unoccupied (*cur)) {
238+ cur++;
239+ }
240+ return iterator (*_self, cur);
241+ }
242+ iterator end () const {
243+ if (_self->size () == 0 ) {
244+ return {};
245+ }
246+
247+ return iterator (*_self, _self->_table + _self->_table_capacity );
248+ }
249+
250+ HashTable* _self;
251+ };
252+
253+ // / Iterates over all occupied entries.
254+ entry_range entries () {
255+ return { this };
256+ }
257+
258+ private:
259+ value_type* _table = nullptr ;
260+ std::size_t _table_capacity = 0 ; // power of two
261+ std::size_t _table_size = 0 ;
262+ };
263+ }
0 commit comments