Skip to content

Commit cb64259

Browse files
committed
tuning the wording and adding a spaceusage benchmark
1 parent 5e2aff2 commit cb64259

File tree

4 files changed

+130
-7
lines changed

4 files changed

+130
-7
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,9 @@ For serialization, there is a choice between an unpacked and a packed format.
5959
The unpacked format is roughly of the same size as in-core data, but uses most
6060
efficient memory copy operations.
6161
62-
The packed format avoids storing zero bytes and is considered near optimal (it
63-
can not be compressed further by zlib and its required space is very close to
64-
the theoretical lower limit), but it needs to copy individual words, so it
65-
should be expected to be somewhat slower.
62+
The packed format avoids storing zero bytes and relies on a bitset to locate them, so it
63+
should be expected to be somewhat slower. The packed format might be smaller or larger.
64+
When in doubt, prefer the regular (unpacked) format.
6665
6766
The two formats use slightly different APIs.
6867
@@ -77,11 +76,13 @@ You may serialize and deserialize in unpacked format as follows:
7776
free(buffer);
7877
```
7978

79+
This should be the default.
80+
8081
To serialize and deserialize in packed format, use the `_pack_bytes()`,
8182
`_pack()` and `_unpack()` functions. The latter two have an additional `size_t`
8283
argument for the buffer length. `_pack()` can be used with a buffer of arbitrary
8384
size, it returns the used space if serialization fit into the buffer or 0
84-
otherwise.
85+
otherwise. Note that the packed format will be slower and may not save space.
8586

8687
For example:
8788

benchmarks/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
add_executable(bench bench.c)
22
target_link_libraries(bench PUBLIC xor_singleheader)
3+
4+
add_executable(spaceusage spaceusage.c)
5+
target_link_libraries(spaceusage PUBLIC xor_singleheader)

benchmarks/spaceusage.c

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#include "binaryfusefilter.h"
2+
#include "xorfilter.h"
3+
#include <stdlib.h>
4+
#include <iso646.h>
5+
6+
typedef struct {
7+
size_t standard;
8+
size_t pack;
9+
} sizes;
10+
11+
sizes fuse16(size_t n) {
12+
binary_fuse16_t filter = {0};
13+
if (! binary_fuse16_allocate(n, &filter)) {
14+
printf("allocation failed\n");
15+
return (sizes) {0, 0};
16+
}
17+
uint64_t* big_set = malloc(n * sizeof(uint64_t));
18+
for(size_t i = 0; i < n; i++) {
19+
big_set[i] = i;
20+
}
21+
bool is_ok = binary_fuse16_populate(big_set, n, &filter);
22+
if(! is_ok ) {
23+
printf("populating failed\n");
24+
}
25+
free(big_set);
26+
sizes s = {
27+
.standard = binary_fuse16_serialization_bytes(&filter),
28+
.pack = binary_fuse16_pack_bytes(&filter)
29+
};
30+
binary_fuse16_free(&filter);
31+
return s;
32+
}
33+
34+
sizes fuse8(size_t n) {
35+
binary_fuse8_t filter = {0};
36+
if (! binary_fuse8_allocate(n, &filter)) {
37+
printf("allocation failed\n");
38+
return (sizes) {0, 0};
39+
}
40+
uint64_t* big_set = malloc(n * sizeof(uint64_t));
41+
for(size_t i = 0; i < n; i++) {
42+
big_set[i] = i;
43+
}
44+
bool is_ok = binary_fuse8_populate(big_set, n, &filter);
45+
if(! is_ok ) {
46+
printf("populating failed\n");
47+
}
48+
free(big_set);
49+
sizes s = {
50+
.standard = binary_fuse8_serialization_bytes(&filter),
51+
.pack = binary_fuse8_pack_bytes(&filter)
52+
};
53+
binary_fuse8_free(&filter);
54+
return s;
55+
}
56+
57+
sizes xor16(size_t n) {
58+
xor16_t filter = {0};
59+
if (! xor16_allocate(n, &filter)) {
60+
printf("allocation failed\n");
61+
return (sizes) {0, 0};
62+
}
63+
uint64_t* big_set = malloc(n * sizeof(uint64_t));
64+
for(size_t i = 0; i < n; i++) {
65+
big_set[i] = i;
66+
}
67+
bool is_ok = xor16_populate(big_set, n, &filter);
68+
if(! is_ok ) {
69+
printf("populating failed\n");
70+
}
71+
free(big_set);
72+
sizes s = {
73+
.standard = xor16_serialization_bytes(&filter),
74+
.pack = xor16_pack_bytes(&filter)
75+
};
76+
xor16_free(&filter);
77+
return s;
78+
}
79+
80+
sizes xor8(size_t n) {
81+
xor8_t filter = {0};
82+
if (! xor8_allocate(n, &filter)) {
83+
printf("allocation failed\n");
84+
return (sizes) {0, 0};
85+
}
86+
uint64_t* big_set = malloc(n * sizeof(uint64_t));
87+
for(size_t i = 0; i < n; i++) {
88+
big_set[i] = i;
89+
}
90+
bool is_ok = xor8_populate(big_set, n, &filter);
91+
if(! is_ok ) {
92+
printf("populating failed\n");
93+
}
94+
free(big_set);
95+
sizes s = {
96+
.standard = xor8_serialization_bytes(&filter),
97+
.pack = xor8_pack_bytes(&filter)
98+
};
99+
xor8_free(&filter);
100+
101+
return s;
102+
}
103+
104+
int main() {
105+
for (size_t n = 10; n <= 10000000; n *= 2) {
106+
printf("%-10zu ", n); // Align number to 10 characters wide
107+
sizes f16 = fuse16(n);
108+
sizes f8 = fuse8(n);
109+
sizes x16 = xor16(n);
110+
sizes x8 = xor8(n);
111+
112+
printf("fuse16: %5.2f %5.2f ", (double)f16.standard * 8.0 / n, (double)f16.pack * 8.0 / n);
113+
printf("fuse8: %5.2f %5.2f ", (double)f8.standard * 8.0 / n, (double)f8.pack * 8.0 / n);
114+
printf("xor16: %5.2f %5.2f ", (double)x16.standard * 8.0 / n, (double)x16.pack * 8.0 / n);
115+
printf("xor8: %5.2f %5.2f ", (double)x8.standard * 8.0 / n, (double)x8.pack * 8.0 / n);
116+
printf("\n");
117+
}
118+
return EXIT_SUCCESS;
119+
}

tests/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ else() # *nix
1414
-Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion -Werror)
1515

1616
if (NOT MINGW) # sanitizers are not supported under mingw
17-
list(APPEND TEST_COMPILE_OPTIONS -fsanitize=address,undefined,leak)
17+
list(APPEND TEST_COMPILE_OPTIONS -fsanitize=address,undefined)
1818
# sanitsizers need to be specified at link time as well
19-
target_link_options(unit PRIVATE -fsanitize=address,leak,undefined)
19+
target_link_options(unit PRIVATE -fsanitize=address,undefined)
2020
endif()
2121
endif()
2222

0 commit comments

Comments
 (0)