Skip to content

Commit b7c969e

Browse files
committed
Add VQF
1 parent 541ad8f commit b7c969e

File tree

8 files changed

+1212
-1
lines changed

8 files changed

+1212
-1
lines changed

benchmarks/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ OPT = -O3 -DNDEBUG -std=c++17
77
CXXFLAGS += -fno-strict-aliasing -Wall -std=c++11 -I. -I../src/ \
88
-I../src/bloom/ -I../src/cuckoo/ -I../src/gcs \
99
-I../src/gqf/ -I../src/morton/ -I../src/xorfilter -I../src/ribbon \
10+
-I../src/vqf \
1011
$(OPT)
1112

1213
UNAME_P := $(shell uname -p)
@@ -20,6 +21,7 @@ LDFLAGS = -Wall -Wextra
2021
HEADERS = $(wildcard ../src/*.h \
2122
../src/bloom/*.h ../src/cuckoo/*.h ../src/gcs/*.h \
2223
../src/gqf/*.h ../src/morton/*.h ../src/xorfilter/*.h ../src/ribbon/*.h \
24+
../src/vqf/*.h \
2325
) *.h
2426

2527
.PHONY: all

benchmarks/bulk-insert-and-query.cc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,8 +356,9 @@ int main(int argc, char * argv[]) {
356356
// GCS
357357
{20,"GCS"},
358358
#ifdef __AVX2__
359-
// CQF
359+
// CQF + VQF
360360
{30,"CQF"},
361+
{31,"VQF"},
361362
#endif
362363
// Bloom
363364
{40, "Bloom8"}, {41, "Bloom12" }, {42, "Bloom16"},
@@ -729,6 +730,13 @@ int main(int argc, char * argv[]) {
729730
add_count, to_add, intersectionsize, mixed_sets, false, true);
730731
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
731732
}
733+
a = 31;
734+
if (algorithmId == a || (algos.find(a) != algos.end())) {
735+
auto cf = FilterBenchmark<
736+
VQFilter<uint64_t, SimpleMixSplit>>(
737+
add_count, to_add, intersectionsize, mixed_sets, true, false);
738+
cout << setw(NAME_WIDTH) << names[a] << cf << endl;
739+
}
732740
#endif
733741

734742
// Bloom ----------------------------------------------------------

benchmarks/filterapi.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "gcs.h"
2525
#ifdef __AVX2__
2626
#include "gqf_cpp.h"
27+
#include "vqf_cpp.h"
2728
#include "simd-block.h"
2829
#endif
2930
#include "simd-block-fixed-fpp.h"
@@ -41,6 +42,7 @@ using namespace gcsfilter;
4142
using namespace CompressedCuckoo; // Morton filter namespace
4243
#ifdef __AVX2__
4344
using namespace gqfilter;
45+
using namespace vqfilter;
4446
#endif
4547
using namespace ribbon;
4648

@@ -853,6 +855,27 @@ struct FilterAPI<GQFilter<ItemType, bits_per_item, HashFamily>> {
853855
return (0 == table->Contain(key));
854856
}
855857
};
858+
859+
template <typename ItemType, typename HashFamily>
860+
struct FilterAPI<VQFilter<ItemType, HashFamily>> {
861+
using Table = VQFilter<ItemType, HashFamily>;
862+
static Table ConstructFromAddCount(size_t add_count) { return Table(add_count); }
863+
static void Add(uint64_t key, Table* table) {
864+
table->Add(key);
865+
}
866+
static void AddAll(const vector<uint64_t>& keys, const size_t start, const size_t end, Table* table) {
867+
table->AddAll(keys, start, end);
868+
// for(size_t i = start; i < end; i++) { Add(keys[i],table); }
869+
}
870+
static void Remove(uint64_t, Table *) {
871+
throw std::runtime_error("Unsupported");
872+
// sometimes fails with the original VQF implementation
873+
// table->Remove(key);
874+
}
875+
CONTAIN_ATTRIBUTES static bool Contain(uint64_t key, const Table * table) {
876+
return (0 == table->Contain(key));
877+
}
878+
};
856879
#endif
857880

858881
template <typename ItemType, size_t bits_per_item, bool branchless, typename HashFamily>

src/vqf/vqf_cpp.h

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#ifndef VQ_FILTER_VQ_FILTER_H_
2+
#define VQ_FILTER_VQ_FILTER_H_
3+
4+
#include <assert.h>
5+
#include <algorithm>
6+
7+
#include "hashutil.h"
8+
9+
#include "vqf_filter.h"
10+
#include "vqf_filter.c"
11+
12+
using namespace std;
13+
using namespace hashing;
14+
15+
namespace vqfilter {
16+
// status returned by a VQ filter operation
17+
enum Status {
18+
Ok = 0,
19+
NotFound = 1,
20+
NotEnoughSpace = 2,
21+
NotSupported = 3,
22+
};
23+
24+
template <typename ItemType, typename HashFamily = SimpleMixSplit>
25+
class VQFilter {
26+
27+
vqf_filter *filter;
28+
uint64_t bytesUsed;
29+
uint64_t range;
30+
double bitsPerItem;
31+
HashFamily hasher;
32+
33+
double BitsPerItem() const { return bitsPerItem; }
34+
35+
void ApplyBlock(uint64_t *tmp, int block, int len);
36+
37+
public:
38+
explicit VQFilter(const size_t n) : hasher() {
39+
40+
// when inserting in random order
41+
// uint64_t nslots = (uint64_t) (n / 0.94);
42+
// when inserting in sorted order
43+
uint64_t nslots = (uint64_t) (n / 0.89);
44+
45+
if ((filter = vqf_init(nslots)) == NULL) {
46+
std::cout << "Can't allocate.\n";
47+
abort();
48+
}
49+
range = filter->metadata.range;
50+
bytesUsed = filter->metadata.total_size_in_bytes;
51+
bitsPerItem = (double) bytesUsed / n;
52+
53+
}
54+
55+
~VQFilter() {
56+
free(filter);
57+
}
58+
59+
// Add an item to the filter.
60+
Status Add(const ItemType &item);
61+
62+
Status AddAll(const vector<ItemType> &data, const size_t start, const size_t end) {
63+
return AddAll(data.data(), start, end);
64+
}
65+
66+
// Add an item to the filter.
67+
Status AddAll(const ItemType *data, const size_t start, const size_t end);
68+
69+
// Report if the item is inserted, with false positive rate.
70+
Status Contain(const ItemType &item) const;
71+
72+
/* methods for providing stats */
73+
// summary infomation
74+
std::string Info() const;
75+
76+
// number of current inserted items;
77+
size_t Size() const { return 0; }
78+
79+
// size of the filter in bytes.
80+
size_t SizeInBytes() const { return bytesUsed; }
81+
};
82+
83+
template <typename ItemType, typename HashFamily>
84+
Status VQFilter<ItemType, HashFamily>::Add(
85+
const ItemType &key) {
86+
uint64_t hash = hasher(key);
87+
bool ret = vqf_insert(filter, hash);
88+
if (!ret) {
89+
std::cout << "failed insertion for key.\n";
90+
abort();
91+
}
92+
return Ok;
93+
}
94+
95+
template <typename ItemType, typename HashFamily>
96+
Status VQFilter<ItemType, HashFamily>::Contain(
97+
const ItemType &key) const {
98+
uint64_t hash = hasher(key);
99+
bool ret = vqf_is_present(filter, hash);
100+
return ret ? Ok : NotFound;
101+
}
102+
103+
const int blockShift = 15;
104+
const int blockLen = 1 << blockShift;
105+
106+
template <typename ItemType, typename HashFamily>
107+
void VQFilter<ItemType, HashFamily>::ApplyBlock(uint64_t *tmp, int block, int len) {
108+
// std::cout << "addAll ApplyBlock block " << block << " len " << len << "\n";
109+
for (int i = 0; i < len; i++) {
110+
uint64_t hash = tmp[(block << blockShift) + i];
111+
// std::cout << "inserting " << hash << "\n";
112+
bool ret = vqf_insert(filter, hash);
113+
if (!ret) {
114+
std::cout << "failed insertion for key.\n";
115+
abort();
116+
}
117+
}
118+
}
119+
120+
template <typename ItemType, typename HashFamily>
121+
Status VQFilter<ItemType, HashFamily>::AddAll(
122+
const ItemType* keys, const size_t start, const size_t end) {
123+
/*
124+
for (size_t i = start; i < end; i++) {
125+
uint64_t key = keys[i];
126+
uint64_t hash = hasher(key);
127+
std::cout << "adding " << hash << "\n";
128+
}
129+
*/
130+
int blocks = 1 + (end - start) / blockLen;
131+
uint64_t *tmp = new uint64_t[blocks * blockLen];
132+
int *tmpLen = new int[blocks]();
133+
// std::cout << "addAll blocks " << blocks << "\n";
134+
for (size_t i = start; i < end; i++) {
135+
uint64_t key = keys[i];
136+
uint64_t hash = hasher(key);
137+
// __uint128_t x = (__uint128_t)key * (__uint128_t)blocks;
138+
__uint128_t x = (__uint128_t)hash * (__uint128_t)blocks;
139+
int block = (uint64_t)(x >> 64);
140+
int len = tmpLen[block];
141+
tmp[(block << blockShift) + len] = hash;
142+
tmpLen[block] = len + 1;
143+
if (len + 1 == blockLen) {
144+
ApplyBlock(tmp, block, len + 1);
145+
tmpLen[block] = 0;
146+
}
147+
}
148+
for (int block = 0; block < blocks; block++) {
149+
ApplyBlock(tmp, block, tmpLen[block]);
150+
tmpLen[block] = 0;
151+
}
152+
delete[] tmp;
153+
delete[] tmpLen;
154+
return Ok;
155+
}
156+
157+
template <typename ItemType, typename HashFamily>
158+
std::string VQFilter<ItemType, HashFamily>::Info() const {
159+
std::stringstream ss;
160+
ss << "VQFilter Status:\n"
161+
<< "\t\tKeys stored: " << Size() << "\n";
162+
if (Size() > 0) {
163+
ss << "\t\tk: " << BitsPerItem() << "\n";
164+
} else {
165+
ss << "\t\tk: N/A\n";
166+
}
167+
return ss.str();
168+
}
169+
} // namespace vqfilter
170+
#endif // VQ_FILTER_VQ_FILTER_H_

0 commit comments

Comments
 (0)