TileDB-Inc
diff --git a/‎apis/python/src/tiledb/vector_search/module.cc‎
Lines changed: 15 additions & 14 deletions b/‎apis/python/src/tiledb/vector_search/module.cc‎
Lines changed: 15 additions & 14 deletions
diff --git a/‎src/include/detail/flat/qv.h‎
Lines changed: 49 additions & 35 deletions b/‎src/include/detail/flat/qv.h‎
Lines changed: 49 additions & 35 deletions
@@ -11,8 +11,6 @@
 namespace py = pybind11;
 using Ctx = tiledb::Context;
 
-bool global_debug = false;
-
 bool enable_stats = false;
 std::vector<json> core_stats;
 
@@ -126,7 +124,8 @@ static void declare_qv_query_heap_infinite_ram(py::module& m, const std::string&
          size_t k_nn,
          size_t nthreads) -> py::tuple { //std::pair<ColMajorMatrix<float>, ColMajorMatrix<size_t>> { // TODO change return type
 
-        auto r = detail::ivf::qv_query_heap_infinite_ram(
+        // auto r = detail::ivf::qv_query_heap_infinite_ram(
+        auto r = detail::ivf::query_infinite_ram(
             parts,
             centroids,
             query_vectors,
@@ -178,7 +177,7 @@ static void declare_nuv_query_heap_infinite_ram(py::module& m, const std::string
          std::vector<Id_Type>& ids,
          size_t nprobe,
          size_t k_nn,
-         size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<size_t>> { // TODO change return type
+         size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> { // TODO change return type
 
         auto r = detail::ivf::nuv_query_heap_infinite_ram_reg_blocked(
             parts,
@@ -205,7 +204,7 @@ static void declare_nuv_query_heap_finite_ram(py::module& m, const std::string&
          size_t nprobe,
          size_t k_nn,
          size_t upper_bound,
-         size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<size_t>> { // TODO change return type
+         size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> { // TODO change return type
 
         auto r = detail::ivf::nuv_query_heap_finite_ram_reg_blocked<T, Id_Type>(
             ctx,
@@ -280,7 +279,7 @@ static void declare_ivf_index_tdb(py::module& m, const std::string& suffix) {
         }, py::keep_alive<1,2>());
 }
 
-template <class T=float, class U=size_t>
+template <class T=float, class U=uint64_t>
 static void declareFixedMinPairHeap(py::module& mod) {
   using PyFixedMinPairHeap = py::class_<fixed_min_pair_heap<T, U>>;
   PyFixedMinPairHeap cls(mod, "FixedMinPairHeap", py::buffer_protocol());
@@ -357,7 +356,7 @@ void declareStdVector(py::module& m, const std::string& suffix) {
     });
 }
 
-template <typename T, typename indices_type = size_t>
+template <typename T, typename indices_type = uint64_t>
 void declarePartitionIvfIndex(py::module& m, const std::string& suffix) {
   m.def(("partition_ivf_index_" + suffix).c_str(),
         [](ColMajorMatrix<float>& centroids,
@@ -401,8 +400,8 @@ static void declare_vq_query_heap(py::module& m, const std::string& suffix) {
            ColMajorMatrix<float>& query_vectors,
            const std::vector<uint64_t> &ids,
            int k,
-           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<size_t>> {
-          auto r = detail::flat::vq_query_heap<tdbColMajorMatrix<T>, ColMajorMatrix<float>, uint64_t>(data, query_vectors, ids, k, nthreads);
+           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
+          auto r = detail::flat::vq_query_heap(data, query_vectors, ids, k, nthreads);
           return r;
         });
 }
@@ -414,8 +413,8 @@ static void declare_vq_query_heap_pyarray(py::module& m, const std::string& suff
            ColMajorMatrix<float>& query_vectors,
            const std::vector<uint64_t> &ids,
            int k,
-           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<size_t>> {
-          auto r = detail::flat::vq_query_heap<ColMajorMatrix<T>, ColMajorMatrix<float>, uint64_t>(data, query_vectors, ids, k, nthreads);
+           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
+          auto r = detail::flat::vq_query_heap(data, query_vectors, ids, k, nthreads);
           return r;
         });
 }
@@ -497,7 +496,7 @@ PYBIND11_MODULE(_tiledbvspy, m) {
         [](ColMajorMatrix<float>& data,
            ColMajorMatrix<float>& query_vectors,
            int k,
-           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<size_t>> {
+           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
           auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
           return r;
         });
@@ -506,13 +505,13 @@ PYBIND11_MODULE(_tiledbvspy, m) {
         [](tdbColMajorMatrix<uint8_t>& data,
            ColMajorMatrix<float>& query_vectors,
            int k,
-           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<size_t>> {
+           size_t nthreads) -> std::tuple<ColMajorMatrix<float>, ColMajorMatrix<uint64_t>> {
           auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
           return r;
         });
 
   m.def("validate_top_k_u64",
-      [](const ColMajorMatrix<size_t>& top_k,
+      [](const ColMajorMatrix<uint64_t>& top_k,
          const ColMajorMatrix<int32_t>& ground_truth) -> bool {
         return validate_top_k(top_k, ground_truth);
       });
@@ -535,9 +534,11 @@ PYBIND11_MODULE(_tiledbvspy, m) {
     return json{core_stats}.dump();
   });
 
+#if 0
   m.def("set_debug", [](bool debug) {
     global_debug = debug;
   });
+#endif
 
   declare_vq_query_heap<uint8_t>(m, "u8");
   declare_vq_query_heap<float>(m, "f32");
 
@@ -73,10 +73,13 @@ namespace detail::flat {
  */
 template <class DB, class Q>
 [[deprecated]] auto qv_query_heap_0(
-    DB& db, const Q& q, int k_nn, unsigned int nthreads) {
+    const DB& db, const Q& q, int k_nn, unsigned int nthreads) {
   scoped_timer _{tdb_func__};
 
-  ColMajorMatrix<size_t> top_k(k_nn, size(q));
+  using id_type = size_t;
+  using score_type = float;
+
+  ColMajorMatrix<id_type> top_k(k_nn, size(q));
 
   auto par = stdx::execution::indexed_parallel_policy{nthreads};
   stdx::range_for_each(
@@ -85,7 +88,7 @@ template <class DB, class Q>
         size_t size_db = size(db);
 
         // @todo can we do this more efficiently?
-        Vector<float> scores(size_db);
+        Vector<score_type> scores(size_db);
 
         for (size_t i = 0; i < size_db; ++i) {
           scores[i] = L2(q_vec, db[i]);
@@ -113,40 +116,44 @@ template <class DB, class Q>
  * @return A matrix of size k x #queries containing the top k results for each
  * query.
  */
-template <class T, class DB, class Q, class Index>
+template <class T, class DB, class Q, class ID>
 auto qv_query_heap(
     T,
-    DB& db,
-    Q& q,
-    const std::vector<Index>& ids,
+    const DB& db,
+    const Q& q,
+    const ID& ids,
     int k_nn,
     unsigned nthreads);
 
 template <class DB, class Q>
-auto qv_query_heap(DB& db, Q& q, int k_nn, unsigned nthreads) {
+auto qv_query_heap(const DB& db, const Q& q, int k_nn, unsigned nthreads) {
   return qv_query_heap(
-      without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
+      without_ids{}, db, q, std::vector<uint64_t>{}, k_nn, nthreads);
 }
 
-template <class DB, class Q, class Index>
+template <class DB, class Q, class ID>
 auto qv_query_heap(
-    DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
+    const DB& db, const Q& q, const ID& ids, int k_nn, unsigned nthreads) {
   return qv_query_heap(with_ids{}, db, q, ids, k_nn, nthreads);
 }
 
 // @todo Add to out of core
-template <class T, class DB, class Q, class Index>
+template <class T, class DB, class Q, class ID>
 auto qv_query_heap(
     T,
-    DB& db,
-    Q& query,
-    const std::vector<Index>& ids,
+    const DB& db,
+    const Q& query,
+    const ID& ids,
     int k_nn,
     unsigned nthreads) {
   scoped_timer _{tdb_func__};
 
-  auto top_k = ColMajorMatrix<size_t>(k_nn, query.num_cols());
-  auto top_k_scores = ColMajorMatrix<float>(k_nn, query.num_cols());
+  // using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
+  using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
+  using score_type = float;
+
+  auto top_k = ColMajorMatrix<id_type>(k_nn, query.num_cols());
+  auto top_k_scores = ColMajorMatrix<score_type>(k_nn, query.num_cols());
 
   // Have to do explicit asynchronous threading here, as the current parallel
   // algorithms have iterator-based interaces, and the `Matrix` class does not
@@ -159,7 +166,7 @@ auto qv_query_heap(
       std::move(par),
       query,
       [&, size_db](auto&& q_vec, auto&& n = 0, auto&& j = 0) {
-        fixed_min_pair_heap<float, size_t> min_scores(k_nn);
+        fixed_min_pair_heap<score_type, id_type> min_scores(k_nn);
 
         for (size_t i = 0; i < size_db; ++i) {
           auto score = L2(q_vec, db[i]);
@@ -190,35 +197,40 @@ auto qv_query_heap(
  * @return A matrix of size k x #queries containing the top k results for each
  * query.
  */
-template <class T, class DB, class Q, class Index>
+template <class T, class DB, class Q, class ID>
 auto qv_query_heap_tiled(
     T,
     DB& db,
-    Q& q,
-    const std::vector<Index>& ids,
+    const Q& q,
+    const ID& ids,
     int k_nn,
     unsigned nthreads);
 
 template <class DB, class Q>
-auto qv_query_heap_tiled(DB& db, Q& q, int k_nn, unsigned nthreads) {
+auto qv_query_heap_tiled(DB& db, const Q& q, int k_nn, unsigned nthreads) {
   return qv_query_heap_tiled(
-      without_ids{}, db, q, std::vector<size_t>{}, k_nn, nthreads);
+      without_ids{}, db, q, std::vector<uint64_t>{}, k_nn, nthreads);
 }
 
-template <class DB, class Q, class Index>
+template <class DB, class Q, class ID>
 auto qv_query_heap_tiled(
-    DB& db, Q& q, const std::vector<Index>& ids, int k_nn, unsigned nthreads) {
+    DB& db, Q& q, const ID& ids, int k_nn, unsigned nthreads) {
   return qv_query_heap_tiled(with_ids{}, db, q, ids, k_nn, nthreads);
 }
 
-template <class T, class DB, class Q, class Index>
+template <class T, class DB, class Q, class ID>
 auto qv_query_heap_tiled(
     T,
     DB& db,
-    Q& query,
-    [[maybe_unused]] const std::vector<Index>& ids,
+    const Q& query,
+    [[maybe_unused]] const ID& ids,
     int k_nn,
     unsigned nthreads) {
+
+  // using feature_type = typename std::remove_reference_t<decltype(db)>::value_type;
+  using id_type = typename std::remove_reference_t<decltype(ids)>::value_type;
+  using score_type = float;
+
   if constexpr (is_loadable_v<decltype(db)>) {
     db.load();
   }
@@ -236,8 +248,8 @@ auto qv_query_heap_tiled(
   std::vector<std::future<void>> futs;
   futs.reserve(nthreads);
 
-  auto min_scores = std::vector<fixed_min_pair_heap<float, size_t>>(
-      size(query), fixed_min_pair_heap<float, size_t>(k_nn));
+  auto min_scores = std::vector<fixed_min_pair_heap<score_type, id_type>>(
+      size(query), fixed_min_pair_heap<score_type, id_type>(k_nn));
 
   // @todo: Use range::for_each
   for (size_t n = 0; n < nthreads; ++n) {
@@ -251,8 +263,8 @@ auto qv_query_heap_tiled(
             auto len = 2 * ((stop - start) / 2);
             auto end = start + len;
 
-            // auto min_scores0 = fixed_min_pair_heap<float, size_t> (k);
-            // auto min_scores1 = fixed_min_pair_heap<float, size_t> (k);
+            // auto min_scores0 = fixed_min_pair_heap<score_type, id_type> (k);
+            // auto min_scores1 = fixed_min_pair_heap<score_type, id_type> (k);
 
             for (auto j = start; j != end; j += 2) {
               auto j0 = j + 0;
@@ -365,15 +377,17 @@ template <class DB, class Q>
 auto qv_partition(const DB& db, const Q& q, unsigned nthreads) {
   scoped_timer _{tdb_func__};
 
+  // Just need a single vector -- creating an index, not ids, so hardcoded size_t is okay to use here
+  using id_type = size_t;
+  using score_type = float;
   auto size_db = size(db);
 
-  // Just need a single vector
-  std::vector<size_t> top_k(q.num_cols());
+  std::vector<id_type> top_k(q.num_cols());
 
   auto par = stdx::execution::indexed_parallel_policy{(size_t)nthreads};
   stdx::range_for_each(
       std::move(par), q, [&, size_db](auto&& qvec, auto&& n = 0, auto&& j = 0) {
-        float min_score = std::numeric_limits<float>::max();
+        score_type min_score = std::numeric_limits<score_type>::max();
         size_t idx = 0;
 
         for (size_t i = 0; i < size_db; ++i) {