Skip to content

Commit 9e184d1

Browse files
committed
WIP
1 parent a1d04ae commit 9e184d1

File tree

15 files changed

+6132
-3
lines changed

15 files changed

+6132
-3
lines changed

src/tools/fdsdump/src/3rd_party/xxhash/xxhash.h

Lines changed: 5580 additions & 0 deletions
Large diffs are not rendered by default.

src/tools/fdsdump/src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
33
# Subdirectories with components
44
add_subdirectory(common)
55
add_subdirectory(lister)
6+
add_subdirectory(aggregator)
67

78
add_executable(fdsdump
89
main.cpp
910
options.cpp
1011
$<TARGET_OBJECTS:common_obj>
1112
$<TARGET_OBJECTS:lister_obj>
13+
$<TARGET_OBJECTS:aggregator_obj>
1214
)
1315

1416
target_include_directories(fdsdump
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Create an aggregator "object" library
2+
set(AGGREGATOR_SRC
3+
aggregator.cpp
4+
arenaAllocator.cpp
5+
hashTable.cpp
6+
)
7+
8+
add_library(aggregator_obj OBJECT ${AGGREGATOR_SRC})
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
#include "aggregator.hpp"
3+
4+
void
5+
mode_aggregate(const shared_iemgr &iemgr, const Options &opts)
6+
{
7+
8+
}
9+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
#pragma once
3+
4+
#include <options.hpp>
5+
#include <common/flowProvider.hpp>
6+
7+
void
8+
mode_aggregate(const shared_iemgr &iemgr, const Options &opts);
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* @file
3+
* @author Michal Sedlak <[email protected]>
4+
* @brief Simple arena allocator
5+
*/
6+
#include "arenaAllocator.hpp"
7+
8+
#include <cassert>
9+
10+
uint8_t *
11+
ArenaAllocator::allocate(size_t size)
12+
{
13+
assert(size <= BLOCK_SIZE);
14+
15+
if (BLOCK_SIZE - m_offset < size) {
16+
m_offset = 0;
17+
m_blocks.push_back(std::unique_ptr<uint8_t []>(new uint8_t[BLOCK_SIZE]));
18+
}
19+
20+
uint8_t *mem = m_blocks.back().get() + m_offset;
21+
m_offset += size;
22+
return mem;
23+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/**
2+
* @file
3+
* @author Michal Sedlak <[email protected]>
4+
* @brief Simple arena allocator
5+
*/
6+
#pragma once
7+
8+
#include <memory>
9+
#include <vector>
10+
11+
static constexpr size_t BLOCK_SIZE = 4 * 1024 * 1024;
12+
13+
/**
14+
* @brief A simple arena allocator.
15+
*
16+
* Provides easy allocation in continuous memory areas and the ability to deallocate all allocated
17+
* memory upon destruction.
18+
*/
19+
class ArenaAllocator {
20+
public:
21+
ArenaAllocator() {}
22+
23+
/** @brief Disallow copy constructor. */
24+
ArenaAllocator(const ArenaAllocator &) = delete;
25+
/** @brief Disallow move constructor. */
26+
ArenaAllocator(ArenaAllocator &&) = delete;
27+
28+
/**
29+
* @brief Allocate bytes.
30+
* @param[in] size The number of bytes
31+
* @warning size cannot be more than BLOCK_SIZE!
32+
* @return Pointer to the bytes
33+
*/
34+
uint8_t *allocate(size_t size);
35+
36+
private:
37+
std::vector<std::unique_ptr<uint8_t []>> m_blocks;
38+
size_t m_offset = BLOCK_SIZE;
39+
};
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/**
2+
* \file
3+
* \author Michal Sedlak <[email protected]>
4+
* \brief Generic binary heap
5+
*/
6+
#pragma once
7+
8+
#include <vector>
9+
#include <algorithm>
10+
11+
/**
12+
* @brief A generic binary heap implementation. It can be used for example
13+
* as a priority queue.
14+
* @tparam ItemType The item type
15+
* @tparam CompareFn The comparator function type
16+
*/
17+
template <typename ItemType, typename CompareFn>
18+
class BinaryHeap {
19+
public:
20+
/**
21+
* @brief Constructs a new instance.
22+
* @param[in] compare The comparator
23+
*/
24+
BinaryHeap(CompareFn compare) : m_compare(compare) {}
25+
26+
/**
27+
* @brief Push an item onto the heap.
28+
* @param[in] item The item
29+
*/
30+
void
31+
push(ItemType item)
32+
{
33+
m_items.push_back(item);
34+
sift_up(m_items.size() - 1);
35+
}
36+
37+
/**
38+
* @brief Push an item onto the heap and also pop one off.
39+
* @param[in] item The item to push
40+
* @return The popped of item
41+
*/
42+
ItemType
43+
push_pop(ItemType item)
44+
{
45+
if (m_compare(item, m_items[0])) {
46+
ItemType result = m_items[0];
47+
m_items[0] = item;
48+
sift_down(0);
49+
return result;
50+
51+
} else {
52+
return item;
53+
}
54+
}
55+
56+
/**
57+
* @brief Pop an item off the top of the heap.
58+
* @return The item
59+
*/
60+
ItemType
61+
pop()
62+
{
63+
ItemType result = m_items[0];
64+
m_items[0] = m_items[m_items.size() - 1];
65+
m_items.pop_back();
66+
sift_down(0);
67+
return result;
68+
}
69+
70+
/**
71+
* @brief Get the item at the top of the heap.
72+
* @return The item
73+
*/
74+
ItemType top() const { return m_items[0]; }
75+
76+
/**
77+
* @brief Get the size of the heap
78+
* @return The number of items the heap holds
79+
*/
80+
size_t size() const { return m_items.size(); }
81+
82+
private:
83+
std::vector<ItemType> m_items;
84+
CompareFn m_compare;
85+
86+
void
87+
sift_up(size_t idx)
88+
{
89+
while (idx > 0) {
90+
size_t parent = (idx - 1) / 2;
91+
if (m_compare(m_items[parent], m_items[idx])) {
92+
std::swap(m_items[parent], m_items[idx]);
93+
idx = parent;
94+
} else {
95+
break;
96+
}
97+
}
98+
}
99+
100+
void
101+
sift_down(size_t idx)
102+
{
103+
for (;;) {
104+
size_t left = 2 * idx + 1;
105+
size_t right = 2 * idx + 2;
106+
size_t smallest = idx;
107+
108+
if (left < m_items.size() && m_compare(m_items[smallest], m_items[left])) {
109+
smallest = left;
110+
}
111+
if (right < m_items.size() && m_compare(m_items[smallest], m_items[right])) {
112+
smallest = right;
113+
}
114+
115+
if (smallest == idx) {
116+
break;
117+
}
118+
119+
std::swap(m_items[smallest], m_items[idx]);
120+
121+
idx = smallest;
122+
}
123+
}
124+
};
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/**
2+
* @file
3+
* @author Michal Sedlak <[email protected]>
4+
* @brief Efficient hash table implementation
5+
*/
6+
7+
#define XXH_INLINE_ALL
8+
9+
#include <xmmintrin.h>
10+
11+
#include "hashTable.hpp"
12+
#include "3rd_party/xxhash/xxhash.h"
13+
14+
static constexpr double EXPAND_WHEN_THIS_FULL = 0.95;
15+
static constexpr unsigned int EXPAND_WITH_FACTOR_OF = 2;
16+
static constexpr uint8_t EMPTY_BIT = 0x80;
17+
18+
HashTable::HashTable(std::size_t key_size, std::size_t value_size) :
19+
m_key_size(key_size), m_value_size(value_size)
20+
{
21+
init_blocks();
22+
}
23+
24+
void
25+
HashTable::init_blocks()
26+
{
27+
HashTableBlock zeroed_block;
28+
29+
memset(&zeroed_block, 0, sizeof(zeroed_block));
30+
for (int i = 0; i < 16; i++) {
31+
zeroed_block.tags[i] |= EMPTY_BIT; // Indicate that the spot is empty
32+
}
33+
34+
m_blocks.resize(m_block_count);
35+
for (auto &block : m_blocks) {
36+
block = zeroed_block;
37+
}
38+
}
39+
40+
bool
41+
HashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
42+
{
43+
uint64_t hash = XXH3_64bits(key, m_key_size); // The hash of the key
44+
uint64_t index = (hash >> 7) & (m_block_count - 1); // The starting block index
45+
46+
for (;;) {
47+
HashTableBlock &block = m_blocks[index];
48+
49+
uint8_t item_tag = (hash & 0xFF) & ~EMPTY_BIT; // Get item tag from part of the hash with the empty bit cleared
50+
auto block_tags = _mm_load_si128(reinterpret_cast<__m128i *>(block.tags)); // Load the current block metadata (16B)
51+
auto hash_mask = _mm_set1_epi8(item_tag); // Repeat the item tag 16 times
52+
auto empty_mask = _mm_set1_epi8(EMPTY_BIT); // Repeat the empty tag 16 times
53+
54+
auto hash_match = _mm_movemask_epi8(_mm_cmpeq_epi8(block_tags, hash_mask)); // Check if any of the metadata matched our item tag
55+
auto empty_match = _mm_movemask_epi8(_mm_cmpeq_epi8(block_tags, empty_mask)); // Check if any of the metadata matched an empty spot
56+
57+
int item_index = 0;
58+
59+
while (hash_match) { // While there are any set bits indicating that the tag of the item we're looking for matched
60+
auto one_index = __builtin_ctz(hash_match); // Get index of the set bit, i.e. the index of the item in the block
61+
item_index += one_index;
62+
63+
uint8_t *record = block.items[item_index]; // The record whose item tag matched
64+
if (memcmp(record, key, m_key_size) == 0) { // Does the key match as well or was it just a hash collision?
65+
item = record;
66+
return true; // We found the item
67+
}
68+
69+
// Move on to the next set bit
70+
hash_match >>= one_index + 1;
71+
item_index += 1;
72+
}
73+
74+
// If we got here we didn't match, but we found an empty spot in the block which
75+
// indicates that we're done with the search. The item cannot be in the next block
76+
// if the current block contains an empty spot
77+
if (empty_match) {
78+
79+
if (!create_if_not_found) {
80+
// If we're just looking for the item and we haven't found it, we're done
81+
return false;
82+
}
83+
84+
// Create a new record
85+
auto empty_index = __builtin_ctz(empty_match);
86+
block.tags[empty_index] = item_tag;
87+
88+
uint8_t *record = m_allocator.allocate(m_key_size + m_value_size);
89+
block.items[empty_index] = record;
90+
m_items.push_back(record);
91+
m_record_count++;
92+
93+
memcpy(record, key, m_key_size); // Copy the key, leave the value part uninitialized
94+
item = record;
95+
96+
// If the hash table has reached a specified percentage of fullness, expand the hash table
97+
if (double(m_record_count) / (16 * double(m_block_count)) >= EXPAND_WHEN_THIS_FULL) {
98+
expand();
99+
}
100+
101+
return false;
102+
}
103+
104+
index = (index + 1) & (m_block_count - 1); // Move on to the next block
105+
}
106+
}
107+
108+
void
109+
HashTable::expand()
110+
{
111+
// Grow the amount of blocks by a specified factor
112+
m_block_count *= EXPAND_WITH_FACTOR_OF;
113+
114+
// Reinitialize the blocks
115+
init_blocks();
116+
117+
// Reassign all the items to the newly initialized blocks
118+
for (uint8_t *item : m_items) {
119+
uint64_t hash = XXH3_64bits(item, m_key_size);
120+
uint64_t index = (hash >> 7) & (m_block_count - 1);
121+
uint8_t item_tag = (hash & 0xFF) & ~EMPTY_BIT;
122+
123+
// Find a spot for the item and insert it
124+
for (;;) {
125+
HashTableBlock &block = m_blocks[index];
126+
127+
auto block_tags = _mm_load_si128(reinterpret_cast<__m128i *>(block.tags));
128+
auto empty_mask = _mm_set1_epi8(EMPTY_BIT);
129+
auto empty_match = _mm_movemask_epi8(_mm_cmpeq_epi8(block_tags, empty_mask));
130+
if (empty_match) { // Does this black have an empty spot for our item?
131+
auto empty_index = __builtin_ctz(empty_match);
132+
block.tags[empty_index] = item_tag;
133+
block.items[empty_index] = item;
134+
break;
135+
}
136+
137+
index = (index + 1) & (m_block_count - 1);
138+
}
139+
140+
}
141+
}
142+
143+
bool
144+
HashTable::find(uint8_t *key, uint8_t *&item)
145+
{
146+
return lookup(key, item, false);
147+
}
148+
149+
bool
150+
HashTable::find_or_create(uint8_t *key, uint8_t *&item)
151+
{
152+
return lookup(key, item, true);
153+
}

0 commit comments

Comments
 (0)