Add Morton filters

thomasmueller · thomasmueller · commit 9db3b57a4dfa · 2019-04-25T17:04:02.000+02:00
diff --git a/benchmarks/bulk-insert-and-query.cc b/benchmarks/bulk-insert-and-query.cc
@@ -19,6 +19,10 @@
 #include <set>
 #include <stdio.h>
 
+// morton
+#include "compressed_cuckoo_filter.h"
+#include "morton_sample_configs.h"
+
 #include "cuckoofilter.h"
 #include "cuckoofilter_stable.h"
 #include "xorfilter.h"
@@ -53,6 +57,7 @@ using namespace xorfilter_plus;
 using namespace bloomfilter;
 using namespace counting_bloomfilter;
 using namespace gcsfilter;
+using namespace CompressedCuckoo; // Morton filter namespace
 #ifdef __AVX2__
 using namespace gqfilter;
 #endif
@@ -308,6 +313,52 @@ struct FilterAPI<XorFilter<ItemType, FingerprintType>> {
   }
 };
 
+class MortonFilter {
+    Morton3_8* filter;
+    size_t size;
+public:
+    MortonFilter(const size_t size) {
+        filter = new Morton3_8((size_t) (2.1 * size) + 64);
+        this->size = size;
+    }
+    ~MortonFilter() {
+        delete filter;
+    }
+    void Add(uint64_t key) {
+        filter->insert(key);
+    }
+    bool Contain(uint64_t &item) {
+        return filter->likely_contains(item);
+    };
+    size_t SizeInBytes() const {
+        // TODO don't know how to get / calculate it
+        return size;
+    }
+};
+
+template<>
+struct FilterAPI<MortonFilter> {
+    using Table = MortonFilter;
+    static Table ConstructFromAddCount(size_t add_count) {
+        return Table(add_count);
+    }
+    static void Add(uint64_t key, Table* table) {
+        table->Add(key);
+    }
+    static void AddAll(const vector<uint64_t> keys, const size_t start, const size_t end, Table* table) {
+        for(int i=start; i<end; i++) {
+            table->Add(keys[i]);
+        }
+    }
+    static void Remove(uint64_t key, Table * table) {
+        throw std::runtime_error("Unsupported");
+    }
+    CONTAIN_ATTRIBUTES static bool Contain(uint64_t key, Table * table) {
+        return table->Contain(key);
+    }
+};
+
+
 class XorSingle {
     xor8_s filter;
 public:
@@ -317,7 +368,7 @@ class XorSingle {
         }
     }
     ~XorSingle() {
-        ::xor8_free(&filter);
+        xor8_free(&filter);
     }
     bool AddAll(const uint64_t* data, const size_t start, const size_t end) {
         return xor8_buffered_populate(data + start, end - start, &filter);
@@ -887,6 +938,7 @@ int main(int argc, char * argv[]) {
     {62, "SuccCountBlockBloom10"},
 
     {70, "Xor8-singleheader"},
+    {80, "Morton"},
 
     // Sort
     {100, "Sort"},
@@ -1345,6 +1397,14 @@ int main(int argc, char * argv[]) {
       cout << setw(NAME_WIDTH) << names[a] << cf << endl;
   }
 
+  a = 80;
+  if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
+      auto cf = FilterBenchmark<
+          MortonFilter>(
+          add_count, to_add, distinct_add, to_lookup, distinct_lookup, intersectionsize, hasduplicates, mixed_sets, seed, true);
+      cout << setw(NAME_WIDTH) << names[a] << cf << endl;
+  }
+
   // Sort ----------------------------------------------------------
   a = 100;
   if (algorithmId == a || algorithmId < 0 || (algos.find(a) != algos.end())) {
diff --git a/src/morton/compressed_cuckoo_config.h b/src/morton/compressed_cuckoo_config.h
@@ -0,0 +1,103 @@
+/*
+Copyright (c) 2019 Advanced Micro Devices, Inc.
+ 
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+ 
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Author: Alex D. Breslow 
+        Advanced Micro Devices, Inc.
+        AMD Research
+*/
+#ifndef _COMPRESSED_CUCKOO_CONFIG_H
+#define _COMPRESSED_CUCKOO_CONFIG_H
+
+namespace CompressedCuckoo{
+  // See vector_types.h for more types and for tuning the vector width and
+  // atom types
+  const bool g_cache_aligned_allocate = true;
+  const size_t g_cache_line_size_bytes = 64;  // Change this as necessary
+  const uint64_t stash_prefix_tag_len = 4;
+  
+  // Allows for up to 255 items per block
+  const uint8_t max_fullness_counter_width = 8;
+  constexpr atom_t one = static_cast<atom_t>(1);
+  
+  enum struct AlternateBucketSelectionMethodEnum{
+    TABLE_BASED_OFFSET,
+    FUNCTION_BASED_OFFSET,
+    FAN_ET_AL_PARTIAL_KEY // Only use this if you can guarantee the total buckets 
+                          // in the filter is a power of two
+  };
+
+  enum struct InsertionMethodEnum{
+    FIRST_FIT,
+    TWO_CHOICE,
+    HYBRID_SIMPLE,
+    HYBRID_PIECEWISE, // Starts off as first-fit and then transitions to two choice
+                  // once you hit a certain load factor 
+    FIRST_FIT_OPT, // Transitions between two implementations of first-fit
+  };
+ 
+  enum struct CounterReadMethodEnum{
+    READ_SIMPLE,
+    READ_CROSS,
+    READ_RAW,  // If counters are always in atom 0 of block 0, just read that.
+               // NOTE: This is prone to bugs, if you rearrange the storage of 
+               // of the counters within a block, so beware.
+    READ_RAW128 // Read the first 128 bits from the block.  See comment above.
+  }; 
+
+  enum struct FingerprintReadMethodEnum{
+    READ_SIMPLE,
+    READ_CROSS,
+    READ_BYTE  // Special optimization for 8-bit fingerprints that are byte 
+               // aligned
+    // RAW reads don't make sense here. We don't statically know which atom 
+    // that needs to be read.  It may make sense with 128-bit atoms, but 
+    // my benchmarking showed 64-bit atoms to be faster.
+  };
+
+  enum struct FingerprintComparisonMethodEnum{
+    VARIABLE_COUNT,
+    FIXED_COUNT_AGGRESSIVE,
+    SEMI_FIXED
+  };
+
+  enum struct ReductionMethodEnum{
+    POP_CNT, // Must only use when counters fit into a single atom
+    PARALLEL_REDUCE,
+    NAIVE_FULL_EXCLUSIVE_SCAN,
+  };
+
+  enum struct OverflowTrackingArrayHashingMethodEnum{
+    // Daniel Lemire's fast hashing method
+    LEMIRE_FINGERPRINT_MULTIPLY,
+    RAW_BUCKET_HASH,
+    CLUSTERED_BUCKET_HASH,
+  };
+
+  enum struct InsertStatus{
+    FAILED_TO_INSERT = 0,
+    PLACED_IN_PRIMARY_BUCKET = 1,
+    PLACED_IN_SECONDARY_BUCKET = 2
+  };
+
+
+} // End of CompressedCuckoo namespace
+
+#endif
diff --git a/src/morton/test_util.h b/src/morton/test_util.h
@@ -0,0 +1,35 @@
+/*
+Copyright (c) 2019 Advanced Micro Devices, Inc.
+ 
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+ 
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Author: Alex D. Breslow 
+        Advanced Micro Devices, Inc.
+        AMD Research
+*/
+#ifndef _TEST_UTIL_H
+#define _TEST_UTIL_H
+
+namespace Test{
+  std::string pass(bool success_status){
+    return std::string(success_status ? "SUCCESS" : "FAILURE");
+  }
+};
+
+#endif
diff --git a/src/morton/util.h b/src/morton/util.h
@@ -0,0 +1,135 @@
+/*
+Copyright (c) 2019 Advanced Micro Devices, Inc.
+ 
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+ 
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+Author: Alex D. Breslow 
+        Advanced Micro Devices, Inc.
+        AMD Research
+*/
+#ifndef _UTIL_H
+#define _UTIL_H
+
+#include <cstdint>
+#include <string>
+#include <sstream>
+#include <cmath>
+
+#include <iostream>
+
+#include "vector_types.h"
+
+// FIXME: Put guards around this
+// For BMI2 pdep instruction
+#ifdef __BMI2__
+#include "x86intrin.h"
+#endif
+
+namespace util{
+
+  template<class INT_TYPE>
+  inline std::string bin_string(INT_TYPE integer, uint32_t spacing){
+    std::stringstream ss;
+    for(int32_t i = sizeof(integer) * 8 - 1; i > -1; i--){
+      ss << ((integer >> i) & 1 ? '1' : '0');
+      if(i % spacing == 0){
+        ss << ' '; 
+      }
+    }
+    return ss.str();
+  }
+
+  template<class INT_TYPE>
+  inline std::string bin_string(INT_TYPE integer){
+    return bin_string<INT_TYPE>(integer, 8 * sizeof(INT_TYPE));
+  }
+
+  // This could be implemented using fancy binary arithmatic or builtins, 
+  // but this probably suffices if the integer is known at compile time.
+  constexpr inline uint32_t log2ceil(uint32_t integer){
+    //return ceil(log2(integer));
+    return 32u - __builtin_clz(integer - 1u);
+  }
+
+  // See https://lemire.me/blog/2016/06/27
+  // These functions implement a fast alternative to the modulo reduction.
+  // The algorithm is presented by Professor Daniel Lemire of the University
+  // of Quebec in his outstanding blog, which is under a Creative Commons 
+  // Attribution-ShareAlike 3.0 Unported License. 
+  // See https://creativecommons.org/licenses/by-sa/3.0/us/ and
+  // https://lemire.me/blog/terms-of-use/.
+  template<typename T>
+  inline T fast_mod_alternative(T raw_hash, T modulus, T hash_width_in_bits);
+
+  template<>
+  inline uint64_t fast_mod_alternative<uint64_t>(uint64_t raw_hash, 
+    uint64_t modulus, uint64_t hash_width_in_bits){
+    return (static_cast<__uint128_t>(raw_hash) * modulus) >> hash_width_in_bits;
+  }
+
+  template<>
+  inline uint32_t fast_mod_alternative<uint32_t>(uint32_t raw_hash, 
+    uint32_t modulus, uint32_t hash_width_in_bits){
+    return (static_cast<__uint64_t>(raw_hash) * modulus) >> hash_width_in_bits;
+  }
+
+  template<class TN, class T> 
+  inline TN fast_mod_alternativeN(TN raw_hashes, T modulus);
+
+	template<class ARRAY_TYPE>
+	inline void print_array(const std::string& name, const ARRAY_TYPE& array){
+		std::cout << name << " [ ";
+    for(uint32_t i = 0; i < batch_size; i++){
+      std::cout << static_cast<uint32_t>(array[i]) << " ";
+		}
+		std::cout << "]\n";
+	}
+
+  template<>
+  inline vN_u32 fast_mod_alternativeN<vN_u32, uint32_t>(vN_u32 raw_hashes, uint32_t modulus){
+    for(uint32_t i = 0; i < _N; i++){
+      static_assert(_N <= 8, "Vector width exceeds AVX/AVX2's 256-bit vector width\n");
+      raw_hashes[i] = static_cast<uint32_t>((static_cast<__uint64_t>(raw_hashes[i]) * modulus) >> 32U);
+    }
+    return raw_hashes;
+  }
+
+} // End of util namespace
+
+
+// FIXME: Not yet tested
+std::ostream& operator<<(std::ostream& os, __uint128_t integer){
+  std::stringstream ss;
+  __uint128_t sqrt_power10 = static_cast<__uint128_t>(10000000000000000000ull);
+  // log10 of 2^127 is between 38 and 39, so start with 38 zeros
+  __int128_t power10 = sqrt_power10 * sqrt_power10;
+  while(static_cast<__uint128_t>(0) / power10 == 0){
+    power10 /= 10;
+  }
+  while(power10 != 0){
+    uint32_t digit = integer / power10;
+    os << static_cast<uint32_t>(digit);
+    integer -= power10 * (digit);
+    power10 /= 10;
+  }
+  return os;
+}
+
+
+#endif