Skip to content

Commit 7cca7cf

Browse files
committed
fdsdump: abstract away hash table choice
1 parent bf6ee4c commit 7cca7cf

File tree

4 files changed

+120
-79
lines changed

4 files changed

+120
-79
lines changed

src/tools/fdsdump/src/aggregator/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ set(AGGREGATOR_SRC
33
mode.cpp
44
aggregator.cpp
55
arenaAllocator.cpp
6-
hashTable.cpp
6+
fastHashTable.cpp
77
view.cpp
88
sort.cpp
99
print.cpp

src/tools/fdsdump/src/aggregator/hashTable.cpp renamed to src/tools/fdsdump/src/aggregator/fastHashTable.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
* @brief Efficient hash table implementation
55
*/
66

7+
#ifdef __SSE2__
8+
79
#define XXH_INLINE_ALL
810

911
#include <xmmintrin.h>
1012

11-
#include "hashTable.hpp"
13+
#include "fastHashTable.hpp"
1214
#include "3rd_party/xxhash/xxhash.h"
1315

1416
namespace fdsdump {
@@ -18,14 +20,14 @@ static constexpr double EXPAND_WHEN_THIS_FULL = 0.95;
1820
static constexpr unsigned int EXPAND_WITH_FACTOR_OF = 2;
1921
static constexpr uint8_t EMPTY_BIT = 0x80;
2022

21-
HashTable::HashTable(std::size_t key_size, std::size_t value_size) :
23+
FastHashTable::FastHashTable(std::size_t key_size, std::size_t value_size) :
2224
m_key_size(key_size), m_value_size(value_size)
2325
{
2426
init_blocks();
2527
}
2628

2729
void
28-
HashTable::init_blocks()
30+
FastHashTable::init_blocks()
2931
{
3032
HashTableBlock zeroed_block;
3133

@@ -41,7 +43,7 @@ HashTable::init_blocks()
4143
}
4244

4345
bool
44-
HashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
46+
FastHashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
4547
{
4648
uint64_t hash = XXH3_64bits(key, m_key_size); // The hash of the key
4749
uint64_t index = (hash >> 7) & (m_block_count - 1); // The starting block index
@@ -109,7 +111,7 @@ HashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
109111
}
110112

111113
void
112-
HashTable::expand()
114+
FastHashTable::expand()
113115
{
114116
// Grow the amount of blocks by a specified factor
115117
m_block_count *= EXPAND_WITH_FACTOR_OF;
@@ -144,16 +146,18 @@ HashTable::expand()
144146
}
145147

146148
bool
147-
HashTable::find(uint8_t *key, uint8_t *&item)
149+
FastHashTable::find(uint8_t *key, uint8_t *&item)
148150
{
149151
return lookup(key, item, false);
150152
}
151153

152154
bool
153-
HashTable::find_or_create(uint8_t *key, uint8_t *&item)
155+
FastHashTable::find_or_create(uint8_t *key, uint8_t *&item)
154156
{
155157
return lookup(key, item, true);
156158
}
157159

158160
} // aggregator
159161
} // fdsdump
162+
163+
#endif // ifdef __SSE2__
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/**
2+
* @file
3+
* @author Michal Sedlak <[email protected]>
4+
* @brief Efficient hash table implementation
5+
*/
6+
#pragma once
7+
8+
#ifdef __SSE2__
9+
10+
#include <aggregator/arenaAllocator.hpp>
11+
12+
#include <cstdint>
13+
#include <vector>
14+
15+
namespace fdsdump {
16+
namespace aggregator {
17+
18+
/**
19+
* @brief A struct representing a hash table block.
20+
*/
21+
struct HashTableBlock
22+
{
23+
alignas(16) uint8_t tags[16];
24+
uint8_t *items[16];
25+
};
26+
27+
/**
28+
* @brief An efficient hash table implementation inspired by a family of hash
29+
* tables known as "Swiss tables".
30+
*/
31+
class FastHashTable {
32+
public:
33+
/**
34+
* @brief Constructs a new instance.
35+
* @param[in] key_size Number of bytes of the key portion of the record
36+
* @param[in] value_size Number of bytes of the value portion of the record
37+
*/
38+
FastHashTable(std::size_t key_size, std::size_t value_size);
39+
40+
/**
41+
* @brief Find a record corresponding to the provided key
42+
* @param key The key
43+
* @param item The stored record including the key
44+
* @return true if the record was found, false otherwise
45+
*/
46+
bool
47+
find(uint8_t *key, uint8_t *&item);
48+
49+
/**
50+
* @brief Find a record corresponding to the provided key or create a new
51+
* one if not found.
52+
* @param key The key
53+
* @param item The stored record including the key
54+
* @return true if the record was found, false if it wasn't and a new record
55+
* was created
56+
*/
57+
bool
58+
find_or_create(uint8_t *key, uint8_t *&item);
59+
60+
/**
61+
* @brief Access the stored records.
62+
* @warning
63+
* If the vector is modified bu the caller in some way, the behavior of
64+
* following calls to the hash table methods are undefined
65+
* @return Vector of the stored records
66+
*/
67+
std::vector<uint8_t *> &items() { return m_items; }
68+
69+
private:
70+
std::size_t m_block_count = 4096;
71+
std::size_t m_record_count = 0;
72+
std::size_t m_key_size;
73+
std::size_t m_value_size;
74+
75+
std::vector<HashTableBlock> m_blocks;
76+
std::vector<uint8_t *> m_items;
77+
78+
ArenaAllocator m_allocator;
79+
80+
bool
81+
lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found);
82+
83+
void
84+
init_blocks();
85+
86+
void
87+
expand();
88+
};
89+
90+
} // aggregator
91+
} // fdsdump
92+
93+
#endif // ifdef __SSE2__
Lines changed: 15 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,33 @@
11
/**
22
* @file
3-
* @author Michal Sedlak <[email protected].cz>
4-
* @brief Efficient hash table implementation
3+
* @author Michal Sedlak <sedlakm@cesnet.cz>
4+
* @brief Select hash table implementation
55
*/
6+
67
#pragma once
78

8-
#include <cstdint>
9-
#include <vector>
9+
#if defined(__SSE2__) && !defined(FDSDUMP_USE_STD_HASHTABLE)
1010

11-
#include "arenaAllocator.hpp"
11+
#include <aggregator/fastHashTable.hpp>
1212

1313
namespace fdsdump {
1414
namespace aggregator {
1515

16-
/**
17-
* @brief A struct representing a hash table block.
18-
*/
19-
struct HashTableBlock
20-
{
21-
alignas(16) uint8_t tags[16];
22-
uint8_t *items[16];
23-
};
24-
25-
/**
26-
* @brief An efficient hash table implementation inspired by a family of hash
27-
* tables known as "Swiss tables".
28-
*/
29-
class HashTable {
30-
public:
31-
/**
32-
* @brief Constructs a new instance.
33-
* @param[in] key_size Number of bytes of the key portion of the record
34-
* @param[in] value_size Number of bytes of the value portion of the record
35-
*/
36-
HashTable(std::size_t key_size, std::size_t value_size);
37-
38-
/**
39-
* @brief Find a record corresponding to the provided key
40-
* @param key The key
41-
* @param item The stored record including the key
42-
* @return true if the record was found, false otherwise
43-
*/
44-
bool
45-
find(uint8_t *key, uint8_t *&item);
46-
47-
/**
48-
* @brief Find a record corresponding to the provided key or create a new
49-
* one if not found.
50-
* @param key The key
51-
* @param item The stored record including the key
52-
* @return true if the record was found, false if it wasn't and a new record
53-
* was created
54-
*/
55-
bool
56-
find_or_create(uint8_t *key, uint8_t *&item);
16+
using HashTable = FastHashTable;
5717

58-
/**
59-
* @brief Access the stored records.
60-
* @warning
61-
* If the vector is modified bu the caller in some way, the behavior of
62-
* following calls to the hash table methods are undefined
63-
* @return Vector of the stored records
64-
*/
65-
std::vector<uint8_t *> &items() { return m_items; }
66-
67-
private:
68-
std::size_t m_block_count = 4096;
69-
std::size_t m_record_count = 0;
70-
std::size_t m_key_size;
71-
std::size_t m_value_size;
72-
73-
std::vector<HashTableBlock> m_blocks;
74-
std::vector<uint8_t *> m_items;
18+
} // aggregator
19+
} // fdsdump
7520

76-
ArenaAllocator m_allocator;
21+
#else
7722

78-
bool
79-
lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found);
23+
#include <aggregator/stdHashTable.hpp>
8024

81-
void
82-
init_blocks();
25+
namespace fdsdump {
26+
namespace aggregator {
8327

84-
void
85-
expand();
86-
};
28+
using HashTable = StdHashTable;
8729

8830
} // aggregator
8931
} // fdsdump
32+
33+
#endif

0 commit comments

Comments
 (0)