Skip to content

Commit 8b45425

Browse files
Merge pull request #1 from lemire/master
Simplifying the code, removing unnecessary (obsolete) dependencies.
2 parents 8c7c8b3 + f431ccc commit 8b45425

17 files changed

+90
-1008
lines changed

README.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,26 @@
11
# xorfilter_cpp
2-
Bloom filter alternative (C++)
2+
Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters (C++)
3+
4+
## Prerequisites
5+
6+
- A C++11 compiler such as GNU G++ or LLVM Clang++
7+
- Make
8+
9+
10+
11+
## Usage
12+
13+
```
14+
cd benchmarks
15+
make
16+
./bulk-insert-and-query.exe 10000000
17+
```
18+
19+
20+
## Where is your code?
21+
22+
See src/xorfilter.h. This single header depends on src/hashutil.h.
23+
24+
## Credit
25+
26+
The code is derived from https://github.com/efficient/cuckoofilter by Bin Fan et al.

benchmarks/Makefile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
OPT = -O3 -DNDEBUG
33
#OPT = -g -ggdb
44

5-
CXXFLAGS += -fno-strict-aliasing -Wall -std=c++11 -I. -I../src/ $(OPT) -march=core-avx2
5+
CXXFLAGS += -fno-strict-aliasing -Wall -std=c++11 -I. -I../src/ $(OPT) -march=native
66

7-
LDFLAGS+= -Wall -lpthread -lssl -lcrypto
7+
LDFLAGS = -Wall
88

99
HEADERS = $(wildcard ../src/*.h) *.h
1010

11-
SRC = ../src/hashutil.cc
11+
1212

1313
.PHONY: all
1414

@@ -19,5 +19,5 @@ all: $(BINS)
1919
clean:
2020
/bin/rm -f $(BINS)
2121

22-
%.exe: %.cc ${HEADERS} ${SRC} Makefile
23-
$(CXX) $(CXXFLAGS) $< -o $@ $(SRC) $(LDFLAGS)
22+
%.exe: %.cc ${HEADERS} Makefile
23+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)

benchmarks/bulk-insert-and-query.cc

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
#endif
4242

4343
using namespace std;
44-
44+
using namespace hashing;
4545
using namespace cuckoofilter;
4646
using namespace xorfilter;
4747
using namespace xorfilter2;
@@ -66,6 +66,15 @@ struct Statistics {
6666
double bits_per_item;
6767
};
6868

69+
//
70+
// Inlining the "contains" which are executed within a tight loop can be both
71+
// very detrimental or very beneficial, and which ways it goes depends on the
72+
// compiler. It is unclear whether we want to benchmark the inlining of Contains,
73+
// as it depends very much on how "contains" is used. So it is maybe reasonable
74+
// to benchmark it without inlining.
75+
//
76+
#define CONTAIN_ATTRIBUTES __attribute__ ((noinline))
77+
6978
// Output for the first row of the table of results. type_width is the maximum number of
7079
// characters of the description of any table type, and find_percent_count is the number
7180
// of different lookup statistics gathered for each table. This function assumes the
@@ -133,6 +142,8 @@ struct FilterAPI<CuckooFilter<ItemType, bits_per_item, TableType, HashFamily>> {
133142
}
134143
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
135144
}
145+
146+
CONTAIN_ATTRIBUTES
136147
static bool Contain(uint64_t key, const Table * table) {
137148
return (0 == table->Contain(key));
138149
}
@@ -167,6 +178,8 @@ struct FilterAPI<SimdBlockFilter<HashFamily>> {
167178
}
168179
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
169180
}
181+
182+
CONTAIN_ATTRIBUTES
170183
static bool Contain(uint64_t key, const Table * table) {
171184
return table->Find(key);
172185
}
@@ -184,6 +197,8 @@ struct FilterAPI<SimdBlockFilterFixed<HashFamily>> {
184197
}
185198
static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
186199
}
200+
201+
CONTAIN_ATTRIBUTES
187202
static bool Contain(uint64_t key, const Table * table) {
188203
return table->Find(key);
189204
}
@@ -200,6 +215,8 @@ struct FilterAPI<XorFilter<ItemType, FingerprintType>> {
200215
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
201216
table->AddAll(keys, start, end);
202217
}
218+
219+
CONTAIN_ATTRIBUTES
203220
static bool Contain(uint64_t key, const Table * table) {
204221
return (0 == table->Contain(key));
205222
}
@@ -214,6 +231,8 @@ struct FilterAPI<XorFilter<ItemType, FingerprintType, HashFamily>> {
214231
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
215232
table->AddAll(keys, start, end);
216233
}
234+
235+
CONTAIN_ATTRIBUTES
217236
static bool Contain(uint64_t key, const Table * table) {
218237
return (0 == table->Contain(key));
219238
}
@@ -228,6 +247,8 @@ struct FilterAPI<XorFilter2<ItemType, FingerprintType, FingerprintStorageType, H
228247
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
229248
table->AddAll(keys, start, end);
230249
}
250+
251+
CONTAIN_ATTRIBUTES
231252
static bool Contain(uint64_t key, const Table * table) {
232253
return (0 == table->Contain(key));
233254
}
@@ -242,6 +263,8 @@ struct FilterAPI<XorFilter2n<ItemType, FingerprintType, FingerprintStorageType,
242263
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
243264
table->AddAll(keys, start, end);
244265
}
266+
267+
CONTAIN_ATTRIBUTES
245268
static bool Contain(uint64_t key, const Table * table) {
246269
return (0 == table->Contain(key));
247270
}
@@ -256,6 +279,8 @@ struct FilterAPI<XorFilterPlus<ItemType, FingerprintType, HashFamily>> {
256279
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
257280
table->AddAll(keys, start, end);
258281
}
282+
283+
CONTAIN_ATTRIBUTES
259284
static bool Contain(uint64_t key, const Table * table) {
260285
return (0 == table->Contain(key));
261286
}
@@ -270,6 +295,8 @@ struct FilterAPI<GcsFilter<ItemType, bits_per_item, HashFamily>> {
270295
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
271296
table->AddAll(keys, start, end);
272297
}
298+
299+
CONTAIN_ATTRIBUTES
273300
static bool Contain(uint64_t key, const Table * table) {
274301
return (0 == table->Contain(key));
275302
}
@@ -285,6 +312,8 @@ struct FilterAPI<GQFilter<ItemType, bits_per_item, HashFamily>> {
285312
}
286313
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
287314
}
315+
316+
CONTAIN_ATTRIBUTES
288317
static bool Contain(uint64_t key, const Table * table) {
289318
return (0 == table->Contain(key));
290319
}
@@ -300,6 +329,8 @@ struct FilterAPI<BloomFilter<ItemType, bits_per_item, HashFamily>> {
300329
}
301330
static void AddAll(const vector<ItemType> keys, const size_t start, const size_t end, Table* table) {
302331
}
332+
333+
CONTAIN_ATTRIBUTES
303334
static bool Contain(uint64_t key, const Table * table) {
304335
return (0 == table->Contain(key));
305336
}

src/bloom.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
#include <assert.h>
55
#include <algorithm>
66

7-
#include "debug.h"
87
#include "hashutil.h"
9-
#include "printutil.h"
108

119
using namespace std;
12-
using namespace cuckoofilter;
10+
using namespace hashing;
1311

1412
namespace bloomfilter {
1513
// status returned by a Bloom filter operation

src/cuckoofilter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const size_t kMaxCuckooCount = 500;
3131
// PackedTable to enable semi-sorting
3232
template <typename ItemType, size_t bits_per_item,
3333
template <size_t> class TableType = SingleTable,
34-
typename HashFamily = TwoIndependentMultiplyShift>
34+
typename HashFamily = hashing::TwoIndependentMultiplyShift>
3535
class CuckooFilter {
3636
// Storage of items
3737
TableType<bits_per_item> *table_;

src/cuckoofilter_stable.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ inline uint32_t reduce(uint64_t hash, uint32_t n) {
2626
// PackedTable to enable semi-sorting
2727
template <typename ItemType, size_t bits_per_item,
2828
template <size_t> class TableType = SingleTable,
29-
typename HashFamily = TwoIndependentMultiplyShift>
29+
typename HashFamily = hashing::TwoIndependentMultiplyShift>
3030
class CuckooFilterStable {
3131
// Storage of items
3232
TableType<bits_per_item> *table_;

src/gcs.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
#include <assert.h>
55
#include <algorithm>
66

7-
#include "debug.h"
87
#include "hashutil.h"
9-
#include "printutil.h"
108

119
using namespace std;
12-
using namespace cuckoofilter;
10+
using namespace hashing;
1311

1412
namespace gcsfilter {
1513
// status returned by a gcs filter operation

src/gqf_cpp.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
#include <assert.h>
55
#include <algorithm>
66

7-
#include "debug.h"
87
#include "hashutil.h"
9-
#include "printutil.h"
108

119
#include "gqf_hashutil.h"
1210
#include "gqf_hashutil.c"
@@ -15,7 +13,7 @@
1513
#include "gqf.c"
1614

1715
using namespace std;
18-
using namespace cuckoofilter;
16+
using namespace hashing;
1917

2018
namespace gqfilter {
2119
// status returned by a GQ filter operation

0 commit comments

Comments
 (0)