Skip to content

Commit e82e15a

Browse files
authored
Harden bloom filter vector access (stellar#4619)
# Description Resolves stellar#4618 This hardens Binary Fuse Filter constructor code to use protected vector lookups instead of C style array dereferencing. # Checklist - [x] Reviewed the [contributing](https://github.com/stellar/stellar-core/blob/master/CONTRIBUTING.md#submitting-changes) document - [x] Rebased on top of master (no merge commits) - [x] Ran `clang-format` v8.0.0 (via `make format` or the Visual Studio extension) - [x] Compiles - [x] Ran all tests - [ ] If change impacts performance, include supporting evidence per the [performance document](https://github.com/stellar/stellar-core/blob/master/performance-eval/performance-eval.md)
2 parents 8a89650 + 687e2dd commit e82e15a

File tree

2 files changed

+103
-73
lines changed

2 files changed

+103
-73
lines changed

lib/binaryfusefilter.h

Lines changed: 74 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,9 @@ template <typename T> class binary_fuse_t
293293
uint32_t block = (1ul << blockBits);
294294

295295
std::vector<uint32_t> startPos(block);
296-
uint32_t h012[5];
296+
std::vector<uint32_t> h012(5);
297297

298-
reverseOrder[size] = 1;
298+
reverseOrder.at(size) = 1;
299299
for (int loop = 0; true; ++loop)
300300
{
301301
if (loop + 1 > XOR_MAX_ITERATIONS)
@@ -310,58 +310,58 @@ template <typename T> class binary_fuse_t
310310
{
311311
// important : i * size would overflow as a 32-bit number in
312312
// some cases.
313-
startPos[i] = ((uint64_t)i * size) >> blockBits;
313+
startPos.at(i) = ((uint64_t)i * size) >> blockBits;
314314
}
315315

316316
uint64_t maskblock = block - 1;
317317
for (uint32_t i = 0; i < size; i++)
318318
{
319-
uint64_t hash = sip_hash24(keys[i], Seed);
319+
uint64_t hash = sip_hash24(keys.at(i), Seed);
320320
uint64_t segment_index = hash >> (64 - blockBits);
321-
while (reverseOrder[startPos[segment_index]] != 0)
321+
while (reverseOrder.at(startPos.at(segment_index)) != 0)
322322
{
323323
segment_index++;
324324
segment_index &= maskblock;
325325
}
326-
reverseOrder[startPos[segment_index]] = hash;
327-
startPos[segment_index]++;
326+
reverseOrder.at(startPos.at(segment_index)) = hash;
327+
startPos.at(segment_index)++;
328328
}
329329
int error = 0;
330330
uint32_t duplicates = 0;
331331
for (uint32_t i = 0; i < size; i++)
332332
{
333-
uint64_t hash = reverseOrder[i];
333+
uint64_t hash = reverseOrder.at(i);
334334
uint32_t h0 = binary_fuse_hash(0, hash);
335-
t2count[h0] += 4;
336-
t2hash[h0] ^= hash;
335+
t2count.at(h0) += 4;
336+
t2hash.at(h0) ^= hash;
337337
uint32_t h1 = binary_fuse_hash(1, hash);
338-
t2count[h1] += 4;
339-
t2count[h1] ^= 1;
340-
t2hash[h1] ^= hash;
338+
t2count.at(h1) += 4;
339+
t2count.at(h1) ^= 1;
340+
t2hash.at(h1) ^= hash;
341341
uint32_t h2 = binary_fuse_hash(2, hash);
342-
t2count[h2] += 4;
343-
t2hash[h2] ^= hash;
344-
t2count[h2] ^= 2;
345-
if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0)
342+
t2count.at(h2) += 4;
343+
t2hash.at(h2) ^= hash;
344+
t2count.at(h2) ^= 2;
345+
if ((t2hash.at(h0) & t2hash.at(h1) & t2hash.at(h2)) == 0)
346346
{
347-
if (((t2hash[h0] == 0) && (t2count[h0] == 8)) ||
348-
((t2hash[h1] == 0) && (t2count[h1] == 8)) ||
349-
((t2hash[h2] == 0) && (t2count[h2] == 8)))
347+
if (((t2hash.at(h0) == 0) && (t2count.at(h0) == 8)) ||
348+
((t2hash.at(h1) == 0) && (t2count.at(h1) == 8)) ||
349+
((t2hash.at(h2) == 0) && (t2count.at(h2) == 8)))
350350
{
351351
duplicates += 1;
352-
t2count[h0] -= 4;
353-
t2hash[h0] ^= hash;
354-
t2count[h1] -= 4;
355-
t2count[h1] ^= 1;
356-
t2hash[h1] ^= hash;
357-
t2count[h2] -= 4;
358-
t2count[h2] ^= 2;
359-
t2hash[h2] ^= hash;
352+
t2count.at(h0) -= 4;
353+
t2hash.at(h0) ^= hash;
354+
t2count.at(h1) -= 4;
355+
t2count.at(h1) ^= 1;
356+
t2hash.at(h1) ^= hash;
357+
t2count.at(h2) -= 4;
358+
t2count.at(h2) ^= 2;
359+
t2hash.at(h2) ^= hash;
360360
}
361361
}
362-
error = (t2count[h0] < 4) ? 1 : error;
363-
error = (t2count[h1] < 4) ? 1 : error;
364-
error = (t2count[h2] < 4) ? 1 : error;
362+
error = (t2count.at(h0) < 4) ? 1 : error;
363+
error = (t2count.at(h1) < 4) ? 1 : error;
364+
error = (t2count.at(h2) < 4) ? 1 : error;
365365
}
366366
if (error)
367367
{
@@ -371,6 +371,9 @@ template <typename T> class binary_fuse_t
371371

372372
// Rotate seed deterministically
373373
auto seedIndex = loop / crypto_shorthash_KEYBYTES;
374+
375+
// Seed is a carray of size crypto_shorthash_KEYBYTES, can't
376+
// segfault
374377
Seed[seedIndex]++;
375378
}
376379

@@ -379,41 +382,41 @@ template <typename T> class binary_fuse_t
379382
// Add sets with one key to the queue.
380383
for (uint32_t i = 0; i < capacity; i++)
381384
{
382-
alone[Qsize] = i;
383-
Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
385+
alone.at(Qsize) = i;
386+
Qsize += ((t2count.at(i) >> 2) == 1) ? 1 : 0;
384387
}
385388
uint32_t stacksize = 0;
386389
while (Qsize > 0)
387390
{
388391
Qsize--;
389-
uint32_t index = alone[Qsize];
390-
if ((t2count[index] >> 2) == 1)
392+
uint32_t index = alone.at(Qsize);
393+
if ((t2count.at(index) >> 2) == 1)
391394
{
392-
uint64_t hash = t2hash[index];
393-
394-
// h012[0] = binary_fuse16_hash(0, hash);
395-
h012[1] = binary_fuse_hash(1, hash);
396-
h012[2] = binary_fuse_hash(2, hash);
397-
h012[3] = binary_fuse_hash(0, hash); // == h012[0];
398-
h012[4] = h012[1];
399-
uint8_t found = t2count[index] & 3;
400-
reverseH[stacksize] = found;
401-
reverseOrder[stacksize] = hash;
395+
uint64_t hash = t2hash.at(index);
396+
397+
// h012.at(0) = binary_fuse16_hash(0, hash);
398+
h012.at(1) = binary_fuse_hash(1, hash);
399+
h012.at(2) = binary_fuse_hash(2, hash);
400+
h012.at(3) = binary_fuse_hash(0, hash); // == h012.at(0);
401+
h012.at(4) = h012.at(1);
402+
uint8_t found = t2count.at(index) & 3;
403+
reverseH.at(stacksize) = found;
404+
reverseOrder.at(stacksize) = hash;
402405
stacksize++;
403-
uint32_t other_index1 = h012[found + 1];
404-
alone[Qsize] = other_index1;
405-
Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
406-
407-
t2count[other_index1] -= 4;
408-
t2count[other_index1] ^= binary_fuse_mod3(found + 1);
409-
t2hash[other_index1] ^= hash;
410-
411-
uint32_t other_index2 = h012[found + 2];
412-
alone[Qsize] = other_index2;
413-
Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
414-
t2count[other_index2] -= 4;
415-
t2count[other_index2] ^= binary_fuse_mod3(found + 2);
416-
t2hash[other_index2] ^= hash;
406+
uint32_t other_index1 = h012.at(found + 1);
407+
alone.at(Qsize) = other_index1;
408+
Qsize += ((t2count.at(other_index1) >> 2) == 2 ? 1 : 0);
409+
410+
t2count.at(other_index1) -= 4;
411+
t2count.at(other_index1) ^= binary_fuse_mod3(found + 1);
412+
t2hash.at(other_index1) ^= hash;
413+
414+
uint32_t other_index2 = h012.at(found + 2);
415+
alone.at(Qsize) = other_index2;
416+
Qsize += ((t2count.at(other_index2) >> 2) == 2 ? 1 : 0);
417+
t2count.at(other_index2) -= 4;
418+
t2count.at(other_index2) ^= binary_fuse_mod3(found + 2);
419+
t2hash.at(other_index2) ^= hash;
417420
}
418421
}
419422
if (stacksize + duplicates == size)
@@ -443,16 +446,16 @@ template <typename T> class binary_fuse_t
443446
for (uint32_t i = size - 1; i < size; i--)
444447
{
445448
// the hash of the key we insert next
446-
uint64_t hash = reverseOrder[i];
449+
uint64_t hash = reverseOrder.at(i);
447450
T xor2 = binary_fuse_fingerprint(hash);
448-
uint8_t found = reverseH[i];
449-
h012[0] = binary_fuse_hash(0, hash);
450-
h012[1] = binary_fuse_hash(1, hash);
451-
h012[2] = binary_fuse_hash(2, hash);
452-
h012[3] = h012[0];
453-
h012[4] = h012[1];
454-
Fingerprints[h012[found]] = xor2 ^ Fingerprints[h012[found + 1]] ^
455-
Fingerprints[h012[found + 2]];
451+
uint8_t found = reverseH.at(i);
452+
h012.at(0) = binary_fuse_hash(0, hash);
453+
h012.at(1) = binary_fuse_hash(1, hash);
454+
h012.at(2) = binary_fuse_hash(2, hash);
455+
h012.at(3) = h012.at(0);
456+
h012.at(4) = h012.at(1);
457+
Fingerprints.at(h012.at(found)) = xor2 ^ Fingerprints.at(h012.at(found + 1)) ^
458+
Fingerprints.at(h012.at(found + 2));
456459
}
457460

458461
return true;
@@ -519,7 +522,7 @@ template <typename T> class binary_fuse_t
519522

520523
for (auto byte_i = 0; byte_i < sizeof(T); ++byte_i)
521524
{
522-
value |= static_cast<T>(xdrFilter.fingerprints[pos + byte_i])
525+
value |= static_cast<T>(xdrFilter.fingerprints.at(pos + byte_i))
523526
<< (byte_i * 8);
524527
}
525528

@@ -535,8 +538,8 @@ template <typename T> class binary_fuse_t
535538
uint64_t hash = sip_hash24(key, Seed);
536539
T f = binary_fuse_fingerprint(hash);
537540
binary_hashes_t hashes = hash_batch(hash);
538-
f ^= Fingerprints[hashes.h0] ^ Fingerprints[hashes.h1] ^
539-
Fingerprints[hashes.h2];
541+
f ^= Fingerprints.at(hashes.h0) ^ Fingerprints.at(hashes.h1) ^
542+
Fingerprints.at(hashes.h2);
540543
return f == 0;
541544
}
542545

src/bucket/BucketIndexImpl.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,35 @@ BucketIndexImpl<IndexT>::BucketIndexImpl(BucketManager& bm,
229229
// Binary Fuse filter requires at least 2 elements
230230
if (keyHashes.size() > 1)
231231
{
232-
mData.filter =
233-
std::make_unique<BinaryFuseFilter16>(keyHashes, seed);
232+
// There is currently an access error that occurs very rarely
233+
// for some random seed values. If this occurs, simply rotate
234+
// the seed and try again.
235+
for (int i = 0; i < 10; ++i)
236+
{
237+
try
238+
{
239+
mData.filter = std::make_unique<BinaryFuseFilter16>(
240+
keyHashes, seed);
241+
}
242+
catch (std::out_of_range& e)
243+
{
244+
auto seedToStr = [](auto seed) {
245+
std::string result;
246+
for (auto b : seed)
247+
{
248+
fmt::format_to(std::back_inserter(result),
249+
"{:02x}", b);
250+
}
251+
return result;
252+
};
253+
254+
CLOG_ERROR(Bucket,
255+
"Bad memory access in BinaryFuseFilter with "
256+
"seed {}, retrying",
257+
seedToStr(seed));
258+
seed[0]++;
259+
}
260+
}
234261
}
235262
}
236263

0 commit comments

Comments
 (0)