TileDB-Inc
diff --git a/‎apis/python/src/tiledb/vector_search/module.cc‎
Lines changed: 12 additions & 8 deletions b/‎apis/python/src/tiledb/vector_search/module.cc‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎apis/python/src/tiledb/vector_search/module.py‎
Lines changed: 7 additions & 5 deletions b/‎apis/python/src/tiledb/vector_search/module.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎apis/python/test/test_api.py‎
Lines changed: 1 addition & 1 deletion b/‎apis/python/test/test_api.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎apis/python/test/test_module.py‎
Lines changed: 1 addition & 0 deletions b/‎apis/python/test/test_module.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/benchmarks/setup.bash‎
Lines changed: 10 additions & 8 deletions b/‎src/benchmarks/setup.bash‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎src/cmake/.Superbuild.cmake.swo‎
4 KB b/‎src/cmake/.Superbuild.cmake.swo‎
4 KB
diff --git a/‎src/include/defs.h‎
Lines changed: 3 additions & 3 deletions b/‎src/include/defs.h‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/include/detail/flat/gemm.h‎
Lines changed: 10 additions & 60 deletions b/‎src/include/detail/flat/gemm.h‎
Lines changed: 10 additions & 60 deletions
diff --git a/‎src/include/detail/flat/qv.h‎
Lines changed: 4 additions & 4 deletions b/‎src/include/detail/flat/qv.h‎
Lines changed: 4 additions & 4 deletions
@@ -187,6 +187,10 @@ static void declareColMajorMatrixSubclass(py::module& mod,
   // TODO auto-namify
   PyTMatrix cls(mod, (name + suffix).c_str(), py::buffer_protocol());
   cls.def(py::init<const Ctx&, std::string, size_t>(),  py::keep_alive<1,2>());
+
+  if constexpr (std::is_same<P, tdbColMajorMatrix<T>>::value) {
+    cls.def("load", &TMatrix::load);
+  }
 }
 
 template <typename T>
@@ -277,22 +281,22 @@ PYBIND11_MODULE(_tiledbvspy, m) {
   /* Query API */
 
   m.def("query_vq_f32",
-        [](const ColMajorMatrix<float>& data,
-           const ColMajorMatrix<float>& query_vectors,
+        [](ColMajorMatrix<float>& data,
+           ColMajorMatrix<float>& query_vectors,
            int k,
            bool nth,
-           size_t nthreads) -> ColMajorMatrix<uint64_t> {
-          auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
+           size_t nthreads) -> ColMajorMatrix<size_t> {
+          auto r = detail::flat::vq_query_nth(data, query_vectors, k, true, nthreads);
           return r;
         });
 
   m.def("query_vq_u8",
-        [](const ColMajorMatrix<uint8_t>& data,
-           const ColMajorMatrix<float>& query_vectors,
+        [](tdbColMajorMatrix<uint8_t>& data,
+           ColMajorMatrix<float>& query_vectors,
            int k,
            bool nth,
-           size_t nthreads) -> ColMajorMatrix<uint64_t> {
-          auto r = detail::flat::vq_query_heap(data, query_vectors, k, nthreads);
+           size_t nthreads) -> ColMajorMatrix<size_t> {
+          auto r = detail::flat::vq_query_nth(data, query_vectors, k, true, nthreads);
           return r;
         });
 
 
@@ -24,19 +24,21 @@ def load_as_matrix(path: str, nqueries: int = 0, config: Dict = {}):
     a = tiledb.ArraySchema.load(path)
     dtype = a.attr(0).dtype
     if dtype == np.float32:
-        return tdbColMajorMatrix_f32(ctx, path, nqueries)
+        m = tdbColMajorMatrix_f32(ctx, path, nqueries)
     elif dtype == np.float64:
-        return tdbColMajorMatrix_f64(ctx, path, nqueries)
+        m =  tdbColMajorMatrix_f64(ctx, path, nqueries)
     elif dtype == np.int32:
-        return tdbColMajorMatrix_i32(ctx, path, nqueries)
+        m = tdbColMajorMatrix_i32(ctx, path, nqueries)
     elif dtype == np.int32:
-        return tdbColMajorMatrix_i64(ctx, path, nqueries)
+        m = tdbColMajorMatrix_i64(ctx, path, nqueries)
     elif dtype == np.uint8:
-        return tdbColMajorMatrix_u8(ctx, path, nqueries)
+        m = tdbColMajorMatrix_u8(ctx, path, nqueries)
     # elif dtype == np.uint64:
     #     return tdbColMajorMatrix_u64(ctx, path, nqueries)
     else:
         raise ValueError("Unsupported Matrix dtype: {}".format(a.attr(0).dtype))
+    m.load()
+    return m
 
 
 def load_as_array(path, return_matrix: bool = False, config: Dict = {}):
 
@@ -50,7 +50,7 @@ def test_flat_query():
 
     r = vs.query_vq(db, targets, k, nqueries, 8)  # k  # nqueries  # nthreads
 
-    ra = np.array(r, copy=False)
+    ra = np.array(r, copy=True)
     print(ra)
     print(ra.shape)
 
 
@@ -13,6 +13,7 @@ def test_tdbMatrix(tmpdir):
 
     ctx = vspy.Ctx({})
     m = vspy.tdbColMajorMatrix_f32(ctx, p, 0)
+    m.load()
     m_array = np.array(m)
     assert m_array.shape == data.shape
     assert np.array_equal(m_array, data)
 
@@ -1,9 +1,11 @@
 #!/bin/bash
 
-ec2_ivf_flat="/home/lums/feature-vector-prototype/src/cmake-build-release/src/ivf_flat"
-m1_ivf_flat="/Users/lums/TileDB/feature-vector-prototype/src/cmake-build-release/src/ivf_flat"
-ec2_flat="/home/lums/feature-vector-prototype/src/cmake-build-release/src/flat"
-m1_flat="/Users/lums/TileDB/feature-vector-prototype/src/cmake-build-release/src/flat"
+ec2_ivf_flat="/home/lums/TileDB-Vector-Search/src/cmake-build-release/libtiledbvectorsearch/src/ivf_flat"
+m1_ivf_flat="/Users/lums/TileDB/TileDB-Vector-Search/src/cmake-build-release/src/ivf_flat"
+ec2_flat="/home/lums/TileDB-Vector-Search/src/cmake-build-release/src/flat_l2"
+m1_flat="/Users/lums/TileDB/TileDB-Vector-Search/src/cmake-build-release/src/flat_l2"
+
+
 
 if [ -f "${ivf_query}" ]; then
     ivf_query="${ivf_query}"
@@ -25,11 +27,11 @@ else
     echo "Neither flat executable file exists"
 fi
 
-# gp3_root=/home/lums/feature-vector-prototype/external/data/gp3
+# gp3_root=/home/lums/TileDB-Vector-Search/external/data/gp3
 nvme_root=/mnt/ssd
 
-ec2_root="/home/lums/feature-vector-prototype/external/data/gp3"
-m1_root="/Users/lums/TileDB/feature-vector-prototype/external/data/gp3"
+ec2_root="/home/lums/TileDB-Vector-Search/external/data/gp3"
+m1_root="/Users/lums/TileDB/TileDB-Vector-Search/external/data/gp3"
 
 if [ -d "${gp3_root}" ]; then
     gp3_root=${gp3_root}
@@ -418,7 +420,7 @@ function ivf_query() {
 		shift 2
 		;;
 	    --cluster|--nprobe)
-		local _cluster="--cluster ${2}"
+		local _cluster="--nprobe ${2}"
 		shift 2
 		;;
 	    --block|--blocksize)
 
@@ -137,9 +137,9 @@ auto mat_col_sum(
   auto num_cols = m.num_cols();
   auto num_rows = m.num_rows();
 
-  for (int j = 0; j < num_cols; ++j) {
+  for (size_t j = 0; j < num_cols; ++j) {
     decltype(v[0]) vj = v[j];
-    for (int i = 0; i < num_rows; ++i) {
+    for (size_t i = 0; i < num_rows; ++i) {
       vj += f(m(i, j));
     }
     v[j] = vj;
@@ -238,7 +238,7 @@ auto get_top_k(const S& scores, int k, bool nth, int nthreads) {
 
   auto num_queries = scores.num_cols();
 
-  auto top_k = ColMajorMatrix<uint64_t>(k, num_queries);
+  auto top_k = ColMajorMatrix<size_t>(k, num_queries);
 
   int q_block_size = (num_queries + nthreads - 1) / nthreads;
   std::vector<std::future<void>> futs;
 
@@ -53,64 +53,29 @@ using namespace std::chrono_literals;
 
 template <class DB, class Q>
 auto blocked_gemm_query(DB& db, Q& q, int k, bool nth, size_t nthreads) {
-  scoped_timer _{"Total time " + tdb_func__};
+  scoped_timer _{tdb_func__};
 
   using element = std::pair<float, unsigned>;
 
-  // @todo constexpr block_db and block_q
-  auto block_db = db.is_blocked();
-  auto block_q = q.is_blocked();
-  auto async_db = block_db && db.is_async();
-  auto async_q = block_q && q.is_async();
-  if (block_db && block_q) {
-    throw std::runtime_error("Can't block both db and q");
-  }
-
   ColMajorMatrix<float> scores(db.num_cols(), q.num_cols());
 
   std::vector<fixed_min_heap<element>> min_scores(
       size(q), fixed_min_heap<element>(k));
 
-  for (;;) {
-    if (async_db) {
-      db.advance_async();
-    }
-    if (async_q) {
-      q.advance_async();
-    }
+  while (db.load()) {
     gemm_scores(db, q, scores, nthreads);
 
     auto par = stdx::execution::indexed_parallel_policy{nthreads};
     stdx::range_for_each(
         std::move(par), scores, [&](auto&& q_vec, auto&& n = 0, auto&& i = 0) {
-          if (block_db) {
-            for (int j = 0; j < scores.num_rows(); ++j) {
-              min_scores[i].insert({scores(j, i), j + db.offset()});
-            }
-          } else if (block_q) {
-            for (int j = 0; j < scores.num_rows(); ++j) {
-              min_scores[i + q.offset()].insert({scores(j, i), j});
-            }
-          } else {
-            for (int j = 0; j < scores.num_rows(); ++j) {
-              min_scores[i].insert({scores(j, i), j});
-            }
+          for (size_t j = 0; j < scores.num_rows(); ++j) {
+            min_scores[i].insert({scores(j, i), j + db.col_offset()});
           }
         });
-
-    bool done = true;
-    if (block_db) {
-      done = async_db ? !db.advance_wait() : !db.advance();
-    } else if (block_q) {
-      done = async_q ? !q.advance_wait() : !q.advance();
-    }
-    if (done) {
-      break;
-    }
   }
 
   ColMajorMatrix<size_t> top_k(k, q.num_cols());
-  for (int j = 0; j < min_scores.size(); ++j) {
+  for (size_t j = 0; j < size(min_scores); ++j) {
     // @todo get_top_k_from_heap
     std::sort_heap(min_scores[j].begin(), min_scores[j].end());
     std::transform(
@@ -125,11 +90,11 @@ auto blocked_gemm_query(DB& db, Q& q, int k, bool nth, size_t nthreads) {
 
 template <class DB, class Q>
 auto gemm_partition(const DB& db, const Q& q, unsigned nthreads) {
-  scoped_timer _{"Total time " + tdb_func__};
+  scoped_timer _{tdb_func__};
 
   auto scores = gemm_scores(db, q, nthreads);
 
-  auto top_k = std::vector<int>(q.num_cols());
+  auto top_k = std::vector<size_t>(q.num_cols());
   {
     for (int i = 0; i < scores.num_cols(); ++i) {
       auto min_score = std::numeric_limits<float>::max();
@@ -151,21 +116,15 @@ auto gemm_partition(const DB& db, const Q& q, unsigned nthreads) {
 
 template <class DB, class Q>
 auto blocked_gemm_partition(DB& db, Q& q, unsigned nthreads) {
-  scoped_timer _{"Total time " + tdb_func__};
-
-  const auto block_db = db.is_blocked();
-  const auto block_q = q.is_blocked();
-  if (block_db && block_q) {
-    throw std::runtime_error("Can't block both db and q");
-  }
+  scoped_timer _{tdb_func__};
 
   ColMajorMatrix<float> scores(db.num_cols(), q.num_cols());
   auto _score_data = raveled(scores);
   auto top_k = std::vector<int>(q.num_cols());
   auto min_scores =
       std::vector<float>(q.num_cols(), std::numeric_limits<float>::max());
 
-  for (;;) {
+  while (db.load()) {
     gemm_scores(db, q, scores, nthreads);
 
     for (int i = 0; i < scores.num_cols(); ++i) {
@@ -181,18 +140,9 @@ auto blocked_gemm_partition(DB& db, Q& q, unsigned nthreads) {
       }
       top_k[i] = idx;
     }
-    bool done = true;
-    if (block_db) {
-      done = !db.advance();
-    } else {
-      done = !q.advance();
-    }
-    if (done) {
-      break;
-    }
   }
   return top_k;
 }
 }  // namespace detail::flat
 
-#endif  // TILEDB_FLAT_GEMM_H
+#endif  // TILEDB_FLAT_GEMM_H
@@ -65,7 +65,7 @@ auto qv_query_nth(
     const DB& db, const Q& q, int k, bool nth, unsigned int nthreads) {
   scoped_timer _{tdb_func__};
 
-  ColMajorMatrix<uint64_t> top_k(k, q.num_cols());
+  ColMajorMatrix<size_t> top_k(k, q.num_cols());
 
   auto par = stdx::execution::indexed_parallel_policy{nthreads};
   stdx::range_for_each(
@@ -101,7 +101,7 @@ auto qv_query_heap(const DB& db, const Q& q, size_t k, unsigned nthreads) {
 
   using element = std::pair<float, int>;
 
-  ColMajorMatrix<uint64_t> top_k(k, q.num_cols());
+  ColMajorMatrix<size_t> top_k(k, q.num_cols());
 
   // Have to do explicit asynchronous threading here, as the current parallel
   // algorithms have iterator-based interaces, and the `Matrix` class does not
@@ -156,7 +156,7 @@ auto qv_partition(const DB& db, const Q& q, unsigned nthreads) {
   scoped_timer _{tdb_func__};
 
   // Just need a single vector
-  std::vector<unsigned> top_k(q.num_cols());
+  std::vector<size_t> top_k(q.num_cols());
 
   // Again, doing the parallelization by hand here....
   size_t size_db = db.num_cols();
@@ -178,7 +178,7 @@ auto qv_partition(const DB& db, const Q& q, unsigned nthreads) {
               float min_score = std::numeric_limits<float>::max();
               size_t idx = 0;
 
-              for (int i = 0; i < size_db; ++i) {
+              for (size_t i = 0; i < size_db; ++i) {
                 auto score = L2(q[j], db[i]);
                 if (score < min_score) {
                   min_score = score;