Skip to content

Commit ed1aa78

Browse files
author
Nikos Papailiou
committed
Merge branch 'main' into npapa/cpp_ingestion
2 parents 9edde59 + 2ed2004 commit ed1aa78

File tree

19 files changed

+279
-35
lines changed

19 files changed

+279
-35
lines changed

.github/workflows/ci_python.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ jobs:
2828
run: |
2929
cd apis/python
3030
pip install .[test]
31-
pytest -k "not ingest" # TODO: requires token
31+
pytest
32+
# TODO: fix editable on linux
3233
#pip uninstall -y tiledb.vector_search
3334
#pip install -e .
34-
#pytest -k "not ingest" # TODO: requires token
35+
#pytest
3536
shell: bash -el {0}

README.md

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,7 @@
1414

1515
# Quick Installation
1616

17-
TileDB-Vector-Search is available from either [PyPI](https://pypi.org/project/tiledb-vector-search/) with ``pip``:
18-
19-
```
20-
pip install tiledb-vector-search
21-
```
22-
23-
or from [conda-forge](https://anaconda.org/conda-forge/tiledb-vector-searcg) with
17+
Pre-built packages are available from the [tiledb conda channel](https://anaconda.org/tiledb/tiledb-vector-search) using
2418
[conda](https://conda.io/docs/) or [mamba](https://github.com/mamba-org/mamba#installation):
2519

2620
```
@@ -33,4 +27,4 @@ We welcome contributions. Please see [`Building`](Building.md) for
3327
development-build instructions. For large new
3428
features, please open an issue to discuss goals and approach in order
3529
to ensure a smooth PR integration and review process. All contributions
36-
must be licensed under the repository's [MIT License](../LICENSE).
30+
must be licensed under the repository's [MIT License](../LICENSE).

apis/python/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ target_link_libraries(${VSPY_TARGET_NAME}
4949
kmeans_lib
5050
TileDB::tiledb_shared)
5151

52-
target_compile_definitions(${VSPY_TARGET_NAME} PRIVATE VERSION_INFO=${PROJECT_VERSION})
52+
target_compile_definitions(${VSPY_TARGET_NAME} PRIVATE TILEDBVS_ENABLE_STATS VERSION_INFO=${PROJECT_VERSION})
5353

5454
if (APPLE)
5555
set_target_properties(${VSPY_TARGET_NAME} PROPERTIES INSTALL_RPATH "@loader_path/lib")

apis/python/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ classifiers = [
2020
dependencies = [
2121
"tiledb-cloud>=0.10.5",
2222
"tiledb>=0.15.2",
23+
"typing-extensions" # for tiledb-cloud indirect
2324
]
2425

2526
[project.optional-dependencies]
26-
test = ["pytest"]
27+
test = ["pytest", "scikit-learn", "tiledb-cloud"]
2728

2829

2930
[project.urls]

apis/python/src/tiledb/vector_search/module.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ using Ctx = tiledb::Context;
1414
bool global_debug = true;
1515
double global_time_of_interest;
1616

17+
bool enable_stats = false;
18+
std::vector<json> core_stats;
19+
1720
PYBIND11_MAKE_OPAQUE(std::vector<uint32_t>);
1821
PYBIND11_MAKE_OPAQUE(std::vector<uint64_t>);
1922

@@ -299,6 +302,24 @@ PYBIND11_MODULE(_tiledbvspy, m) {
299302
return validate_top_k(top_k, ground_truth);
300303
});
301304

305+
m.def("stats_enable", []() {
306+
enable_stats = true;
307+
tiledb::Stats::enable();
308+
});
309+
310+
m.def("stats_disable", []() {
311+
enable_stats = false;
312+
tiledb::Stats::disable();
313+
});
314+
315+
m.def("stats_reset", []() {
316+
core_stats.clear();
317+
});
318+
319+
m.def("stats_dump", []() {
320+
return json{core_stats}.dump();
321+
});
322+
302323
declare_kmeans_query<uint8_t>(m, "u8");
303324
declare_kmeans_query<float>(m, "f32");
304325

documentation/index.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414

1515
# Quick Installation
1616

17-
TileDB-Vector-Search is available from either [PyPI](https://pypi.org/project/tiledb-vector-search/) with ``pip``:
17+
TileDB-Vector-Search is available from the [tiledb conda channel](https://anaconda.org/tiledb/tiledb-vector-search) with
18+
[conda](https://conda.io/docs/) or [mamba](https://github.com/mamba-org/mamba#installation)
1819

1920
```
20-
pip install tiledb-vector-search
21+
conda install -c tiledb tiledb-vector-search
2122
```
2223

23-
~~or from the [tiledb conda channel](https://anaconda.org/tiledb/tiledb-vector-search) with
24-
[conda](https://conda.io/docs/) or [mamba](https://github.com/mamba-org/mamba#installation)~~ (Pending 🚧):
24+
~~Or [PyPI](https://pypi.org/project/tiledb-vector-search/) with ``pip``~~: (Pending 🚧)
2525

2626
```
27-
conda install -c tiledb tiledb-vector-search
27+
pip install tiledb-vector-search
2828
```
2929

3030
# Contributing
@@ -33,4 +33,4 @@ We welcome contributions. Please see [`Building`](Building.md) for
3333
development-build instructions. For large new
3434
features, please open an issue to discuss goals and approach in order
3535
to ensure a smooth PR integration and review process. All contributions
36-
must be licensed under the repository's [MIT License](../LICENSE).
36+
must be licensed under the repository's [MIT License](../LICENSE).
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/**
2+
* @file tiledb_helpers.h
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2023 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* Helper functions for certain TileDB operations.
31+
*
32+
*/
33+
34+
#ifndef TILEDB_HELPERS_H
35+
#define TILEDB_HELPERS_H
36+
37+
#include "stats.h"
38+
#include <tiledb/tiledb>
39+
40+
namespace tiledb_helpers {
41+
42+
/**
43+
* @brief Opens a TileDB array and displays stats to stderr.
44+
*
45+
* Stats are only collected if the TILEDBVS_ENABLE_STATS symbol is
46+
* defined, and a variable named enable_stats is set to true.
47+
* The stats are written to a FILE* specified by the variable named stats_file.
48+
*
49+
* @param function_name The name of the function calling this. You can use the tdb_func__ macro.
50+
* @param ctx The TileDB context to use.
51+
* @param uri The URI of the array to open.
52+
* @param query_type The mode to open the array.
53+
*/
54+
inline tiledb::Array open_array(const std::string &function_name,
55+
const tiledb::Context &ctx,
56+
const std::string &uri,
57+
tiledb_query_type_t query_type) {
58+
StatsCollectionScope stats_scope(uri, function_name, "open_array");
59+
return tiledb::Array(ctx, uri, query_type);
60+
}
61+
62+
/**
63+
* @brief Submits a TileDB query and displays stats to stderr.
64+
*
65+
* Stats are only collected if the TILEDBVS_ENABLE_STATS symbol is
66+
* defined, and a variable named enable_stats is set to true.
67+
* The stats are written to a FILE* specified by the variable named stats_file.
68+
*
69+
* @param function_name The name of the function calling this. You can use the tdb_func__ macro.
70+
* @param query The query to submit.
71+
*/
72+
inline void submit_query(const std::string &function_name,
73+
const std::string &uri,
74+
tiledb::Query &query) {
75+
StatsCollectionScope stats_scope(uri, function_name, "submit_query");
76+
query.submit();
77+
}
78+
79+
} // namespace tiledb_helpers
80+
81+
#endif

src/include/detail/linalg/tdb_io.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ void write_matrix(
100100
(int)start_pos + (int)A.num_cols() - 1};
101101

102102
// Open array for writing
103-
tiledb::Array array(ctx, uri, TILEDB_WRITE);
103+
tiledb::Array array =
104+
tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_WRITE);
104105

105106
tiledb::Subarray subarray(ctx, array);
106107
subarray.set_subarray(subarray_vals);
@@ -113,7 +114,7 @@ void write_matrix(
113114
.set_data_buffer(
114115
"values", &A(0, 0), (uint64_t)A.num_rows() * (uint64_t)A.num_cols())
115116
.set_subarray(subarray);
116-
query.submit();
117+
tiledb_helpers::submit_query(tdb_func__, uri, query);
117118

118119
assert(tiledb::Query::Status::COMPLETE == query.query_status());
119120

@@ -167,7 +168,8 @@ void write_vector(
167168
(int)start_pos, (int)start_pos + (int)size(v) - 1};
168169

169170
// Open array for writing
170-
tiledb::Array array(ctx, uri, TILEDB_WRITE);
171+
tiledb::Array array =
172+
tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_WRITE);
171173

172174
tiledb::Subarray subarray(ctx, array);
173175
subarray.set_subarray(subarray_vals);
@@ -176,8 +178,10 @@ void write_vector(
176178
query.set_layout(TILEDB_ROW_MAJOR)
177179
.set_data_buffer("values", v)
178180
.set_subarray(subarray);
181+
179182
query.submit();
180183
assert(tiledb::Query::Status::COMPLETE == query.query_status());
184+
tiledb_helpers::submit_query(tdb_func__, uri, query);
181185

182186
array.close();
183187
}
@@ -193,7 +197,8 @@ std::vector<T> read_vector(const tiledb::Context& ctx, const std::string& uri) {
193197
std::cerr << "# Reading std::vector: " << uri << std::endl;
194198
}
195199

196-
auto array_ = tiledb::Array{ctx, uri, TILEDB_READ};
200+
tiledb::Array array_ =
201+
tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ);
197202
auto schema_ = array_.schema();
198203

199204
using domain_type = int32_t;
@@ -225,7 +230,7 @@ std::vector<T> read_vector(const tiledb::Context& ctx, const std::string& uri) {
225230
tiledb::Query query(ctx, array_);
226231
query.set_subarray(subarray).set_data_buffer(
227232
attr_name, data_.data(), vec_rows_);
228-
query.submit();
233+
tiledb_helpers::submit_query(tdb_func__, uri, query);
229234
_memory_data.insert_entry(tdb_func__, vec_rows_ * sizeof(T));
230235

231236
array_.close();

src/include/detail/linalg/tdb_matrix.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
7272
log_timer constructor_timer{"tdbMatrix constructor"};
7373

7474
std::reference_wrapper<const tiledb::Context> ctx_;
75+
std::string uri_;
7576
tiledb::Array array_;
7677
tiledb::ArraySchema schema_;
7778
std::unique_ptr<T[]> backing_data_;
@@ -195,7 +196,8 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
195196
size_t col_begin,
196197
size_t col_end) // noexcept
197198
: ctx_{ctx}
198-
, array_{ctx, uri, TILEDB_READ}
199+
, uri_{uri}
200+
, array_{tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ)}
199201
, schema_{array_.schema()} {
200202
constructor_timer.stop();
201203
scoped_timer _{tdb_func__ + uri};
@@ -278,7 +280,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
278280
query.set_subarray(subarray)
279281
.set_layout(layout_order)
280282
.set_data_buffer(attr_name, data_.get(), num_rows * num_cols);
281-
query.submit();
283+
tiledb_helpers::submit_query(tdb_func__, uri, query);
282284
_memory_data.insert_entry(tdb_func__, num_rows * num_cols * sizeof(T));
283285

284286
// assert(tiledb::Query::Status::COMPLETE == query.query_status());
@@ -391,7 +393,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
391393
query.set_subarray(subarray)
392394
.set_layout(layout_order)
393395
.set_data_buffer(attr_name, ptr, num_elements);
394-
query.submit();
396+
tiledb_helpers::submit_query(tdb_func__, uri, query);
395397
_memory_data.insert_entry(tdb_func__, num_elements * sizeof(T));
396398

397399
// assert(tiledb::Query::Status::COMPLETE == query.query_status());
@@ -413,7 +415,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
413415
*/
414416
auto attr_idx = 0;
415417

416-
auto ids_array_ = tiledb::Array{ctx_, id_uri, TILEDB_READ};
418+
tiledb::Array ids_array_ = tiledb_helpers::open_array(tdb_func__, ctx_, id_uri, TILEDB_READ);
417419
auto ids_schema_ = ids_array_.schema();
418420

419421
auto attr_num{ids_schema_.attribute_num()};
@@ -445,7 +447,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
445447
auto ptr = part_ids.data() + offset;
446448
query.set_subarray(subarray).set_data_buffer(
447449
attr_name, ptr, num_elements);
448-
query.submit();
450+
tiledb_helpers::submit_query(tdb_func__, uri, query);
449451
_memory_data.insert_entry(tdb_func__, num_elements * sizeof(T));
450452

451453
if (tiledb::Query::Status::COMPLETE != query.query_status()) {
@@ -552,7 +554,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
552554
query.set_subarray(subarray)
553555
.set_layout(layout_order)
554556
.set_data_buffer(attr_name, this_data, read_size);
555-
query.submit();
557+
tiledb_helpers::submit_query(tdb_func__, uri_, query);
556558
_memory_data.insert_entry(tdb_func__, read_size * sizeof(T));
557559

558560
// assert(tiledb::Query::Status::COMPLETE == query.query_status());

src/include/detail/linalg/tdb_partitioned_matrix.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class tdbPartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
106106
log_timer constructor_timer{"tdbPartitionedMatrix constructor"};
107107

108108
std::reference_wrapper<const tiledb::Context> ctx_;
109+
std::string uri_;
109110
tiledb::Array array_;
110111
tiledb::ArraySchema schema_;
111112
std::unique_ptr<T[]> backing_data_;
@@ -178,9 +179,10 @@ class tdbPartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
178179
size_t nthreads)
179180
: constructor_timer{tdb_func__ + std::string{" constructor"}}
180181
, ctx_{ctx}
181-
, array_{ctx_, uri, TILEDB_READ}
182+
, uri_{uri}
183+
, array_{tiledb_helpers::open_array(tdb_func__, ctx_, uri, TILEDB_READ)}
182184
, schema_{array_.schema()}
183-
, ids_array_{ctx_, ids_uri, TILEDB_READ}
185+
, ids_array_{tiledb_helpers::open_array(tdb_func__, ctx_, ids_uri, TILEDB_READ)}
184186
, ids_schema_{ids_array_.schema()}
185187
, indices_{std::move(in_indices)}
186188
, parts_{in_parts}
@@ -345,7 +347,7 @@ class tdbPartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
345347
query.set_subarray(subarray)
346348
.set_layout(layout_order)
347349
.set_data_buffer(attr_name, ptr, col_count * dimension);
348-
query.submit();
350+
tiledb_helpers::submit_query(tdb_func__, uri_, query);
349351
_memory_data.insert_entry(tdb_func__, col_count * dimension * sizeof(T));
350352

351353
// assert(tiledb::Query::Status::COMPLETE == query.query_status());

0 commit comments

Comments
 (0)