Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions include/openvic-dataloader/Error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
#include <cstdint>
#include <string_view>

#include <openvic-dataloader/detail/SymbolIntern.hpp>
#include <openvic-dataloader/detail/Utility.hpp>

#include <dryad/abstract_node.hpp>
#include <dryad/node.hpp>
#include <dryad/symbol.hpp>

namespace ovdl {
template<typename>
Expand Down Expand Up @@ -51,12 +51,7 @@ namespace ovdl::error {
LastAnnotation = SecondaryAnnotation,
};

struct ErrorSymbolInterner {
struct SymbolId;
using index_type = std::uint32_t;
using symbol_type = dryad::symbol<SymbolId, index_type>;
using symbol_interner_type = dryad::symbol_interner<SymbolId, char, index_type>;
};
struct ErrorSymbolInterner : SymbolIntern {};

static constexpr std::string_view get_kind_name(ErrorKind kind) {
switch (kind) {
Expand All @@ -70,7 +65,7 @@ namespace ovdl::error {
}

struct Error : dryad::abstract_node_all<ErrorKind> {
const char* message(const ErrorSymbolInterner::symbol_interner_type& symbols) const { return _message.c_str(symbols); }
const char* message(const ErrorSymbolInterner::symbol_interner_type& symbols) const { return _message.c_str(); }

protected:
DRYAD_ABSTRACT_NODE_CTOR(Error);
Expand Down
2 changes: 0 additions & 2 deletions include/openvic-dataloader/csv/Parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#include <openvic-dataloader/detail/Encoding.hpp>
#include <openvic-dataloader/detail/ErrorRange.hpp>

#include <dryad/node.hpp>

namespace ovdl::csv {
class Parser final : public detail::BasicParser {
public:
Expand Down
2 changes: 0 additions & 2 deletions include/openvic-dataloader/detail/ErrorRange.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

#include <utility>

#include <dryad/node.hpp>

namespace ovdl::detail {
template<typename ErrorRoot>
using error_range = decltype(std::declval<const ErrorRoot*>()->errors());
Expand Down
53 changes: 53 additions & 0 deletions include/openvic-dataloader/detail/HashAlgorithm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#pragma once

#include <cstdint>
#include <type_traits>

namespace ovdl::detail {
/// FNV-1a 64 bit hash.
class DefaultHash {
static constexpr std::uint64_t fnv_basis = 14695981039346656037ull;
static constexpr std::uint64_t fnv_prime = 1099511628211ull;

public:
explicit DefaultHash() : _hash(fnv_basis) {}

DefaultHash(DefaultHash&&) = default;
DefaultHash& operator=(DefaultHash&&) = default;

~DefaultHash() = default;

DefaultHash&& hash_bytes(const unsigned char* ptr, std::size_t size) {
for (auto i = 0u; i != size; ++i) {
_hash ^= ptr[i];
_hash *= fnv_prime;
}
return static_cast<std ::remove_reference_t<decltype(*this)>&&>(*this);
}

template<typename T>
requires std::is_scalar_v<T>
DefaultHash&& hash_scalar(T value) {
static_assert(!std::is_floating_point_v<T>,
"you shouldn't use floats as keys for a hash table");
hash_bytes(reinterpret_cast<unsigned char*>(&value), sizeof(T));
return static_cast<std ::remove_reference_t<decltype(*this)>&&>(*this);
}

template<typename CharT>
DefaultHash&& hash_c_str(const CharT* str) {
while (*str != '\0') {
hash_scalar(*str);
++str;
}
return static_cast<std ::remove_reference_t<decltype(*this)>&&>(*this);
}

std::uint64_t finish() && {
return _hash;
}

private:
std::uint64_t _hash;
};
}
263 changes: 263 additions & 0 deletions include/openvic-dataloader/detail/HashTable.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
#pragma once

#include <bit>
#include <cassert>
#include <climits>
#include <cstddef>
#include <type_traits>

#include <openvic-dataloader/detail/Utility.hpp>

namespace ovdl::detail {
/// A simple hash table for trivial keys with linear probing.
/// It is non-owning as it does not store the used memory resource.
template<typename Traits, std::size_t MinTableSize>
class HashTable {
public:
using value_type = typename Traits::value_type;
static_assert(std::is_trivial_v<value_type>);

constexpr HashTable() = default;

template<typename ResourcePtr>
void free(ResourcePtr resource) {
if (_table_capacity == 0) {
return;
}

resource->deallocate(_table, _table_capacity * sizeof(value_type), alignof(value_type));
_table = nullptr;
_table_size = 0;
_table_capacity = 0;
}

struct entry_handle {
HashTable* _self;
value_type* _entry;
bool _valid;

explicit operator bool() const {
return _valid;
}

std::size_t index() const {
return std::size_t(_entry - _self->_table);
}

value_type& get() const {
assert(*this);
return *_entry;
}

void create(const value_type& value) {
assert(!*this);
*_entry = value;
++_self->_table_size;
_valid = true;
}

void remove() {
assert(*this);
Traits::fill_removed(_entry, 1);
--_self->_table_size;
_valid = false;
}
};

// Looks for an entry in the table, creating one if necessary.
//
// If it is already in the table, returns a pointer to its valid entry.
//
// Otherwise, locates a new entry for that value and returns a pointer to it which is currently
// invalid. Invariants of map are broken until the ptr has been written to.
template<typename Key>
entry_handle lookup_entry(const Key& key, Traits traits = {}) {
assert(_table_size < _table_capacity);

auto hash = traits.hash(key);
auto table_idx = hash & (_table_capacity - 1);

while (true) {
auto entry = _table + table_idx;
if (Traits::is_unoccupied(*entry)) {
// We found an empty entry, return it.
return { this, entry, false };
}

// Check whether the entry is the same string.
if (traits.is_equal(*entry, key)) {
// It is already in the table, return it.
return { this, entry, true };
}

// Go to next entry.
table_idx = (table_idx + 1) & (_table_capacity - 1);
}
}
template<typename Key>
value_type* lookup(const Key& key, Traits traits = {}) const {
if (_table_size == 0) {
return nullptr;
}

auto entry = const_cast<HashTable*>(this)->lookup_entry(key, traits);
return entry ? &entry.get() : nullptr;
}

bool should_rehash() const {
return _table_size >= _table_capacity / 2;
}

static constexpr std::size_t to_table_capacity(unsigned long long cap) {
if (cap < MinTableSize) {
return MinTableSize;
}

// Round up to next power of two.
return std::size_t(1) << (int(sizeof(cap) * CHAR_BIT) - std::countl_zero<size_t>(cap - 1));
}

template<typename ResourcePtr, typename Callback = void (*)(entry_handle, std::size_t)>
void rehash(
ResourcePtr resource, std::size_t new_capacity, Traits traits = {},
Callback entry_cb = +[](entry_handle, std::size_t) {}) {
assert(new_capacity == to_table_capacity(new_capacity));
if (new_capacity <= _table_capacity) {
return;
}

auto old_table = _table;
auto old_capacity = _table_capacity;

// Allocate a bigger, currently empty table.
_table = static_cast<value_type*>(
resource->allocate(new_capacity * sizeof(value_type), alignof(value_type)));
_table_capacity = new_capacity;
Traits::fill_unoccupied(_table, _table_capacity);

// Insert existing values into the new table.
if (_table_size > 0) {
_table_size = 0;

for (auto entry = old_table; entry != old_table + old_capacity; ++entry) {
if (!Traits::is_unoccupied(*entry)) {
auto new_entry = lookup_entry(*entry, traits);
new_entry.create(*entry);
entry_cb(new_entry, std::size_t(entry - old_table));
}
}
}

if (old_capacity > 0) {
resource->deallocate(old_table, old_capacity * sizeof(value_type), alignof(value_type));
}
}
template<typename ResourcePtr, typename Callback = void (*)(entry_handle, std::size_t)>
void rehash(
ResourcePtr resource, Traits traits = {},
Callback entry_cb = +[](entry_handle, std::size_t) {}) {
rehash(resource, to_table_capacity(2 * _table_capacity), traits, entry_cb);
}

//=== access ===//
std::size_t size() const {
return _table_size;
}
std::size_t capacity() const {
return _table_capacity;
}

struct entry_range {
struct iterator {
using value_type = std::remove_cv_t<entry_handle>;
using reference = entry_handle;
struct pointer {
value_type value;

constexpr value_type* operator->() noexcept {
return &value;
}
};
using difference_type = std::ptrdiff_t;
using iterator_category = std::forward_iterator_tag;

constexpr reference operator*() const noexcept {
return static_cast<const iterator&>(*this).deref();
}
constexpr pointer operator->() const noexcept {
return pointer { **this };
}

constexpr iterator& operator++() noexcept {
auto& derived = static_cast<iterator&>(*this);
derived.increment();
return derived;
}
constexpr iterator operator++(int) noexcept {
auto& derived = static_cast<iterator&>(*this);
auto copy = derived;
derived.increment();
return copy;
}

friend constexpr bool operator==(const iterator& lhs, const iterator& rhs) {
return lhs.equal(rhs);
}
friend constexpr bool operator!=(const iterator& lhs, const iterator& rhs) {
return !lhs.equal(rhs);
}

HashTable* _self;
HashTable::value_type* _cur;

iterator() : _self(nullptr), _cur(nullptr) {}
explicit iterator(HashTable& self, HashTable::value_type* cur)
: _self(&self), _cur(cur) {}

entry_handle deref() const {
return { _self, _cur, true };
}
void increment() {
auto end = _self->_table + _self->_table_capacity;
do {
++_cur;
} while (_cur != end && Traits::is_unoccupied(*_cur));
}
bool equal(iterator rhs) const {
return _cur == rhs._cur;
}
};

iterator begin() const {
if (_self->size() == 0) {
return {};
}

auto cur = _self->_table;
while (Traits::is_unoccupied(*cur)) {
cur++;
}
return iterator(*_self, cur);
}
iterator end() const {
if (_self->size() == 0) {
return {};
}

return iterator(*_self, _self->_table + _self->_table_capacity);
}

HashTable* _self;
};

/// Iterates over all occupied entries.
entry_range entries() {
return { this };
}

private:
value_type* _table = nullptr;
std::size_t _table_capacity = 0; // power of two
std::size_t _table_size = 0;
};
}
Loading