Skip to content

Commit 2ed2004

Browse files
Log stats on array openings and query submissions. (#52)
* Add wrappers over opening arrays and submitting queries that optionally log stats. * Use the wrapper functions throughout the codebase. * Support collecting stats in the Python API and add functions to enable it. * Log the name of the calling function alongside the stats. * Support customizing the file the stats are written and default it to stdout. * Log Core stats in JSON and add specialized dump and reset Python API functions. * Support logging stats in the CLI programs. * Use `raw_dump` to get the stats. `dump` and `raw_dump` do the same, but only the latter is documented to return JSON.
1 parent 67b4623 commit 2ed2004

File tree

15 files changed

+262
-19
lines changed

15 files changed

+262
-19
lines changed

apis/python/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ target_link_libraries(${VSPY_TARGET_NAME}
4949
kmeans_lib
5050
TileDB::tiledb_shared)
5151

52-
target_compile_definitions(${VSPY_TARGET_NAME} PRIVATE VERSION_INFO=${PROJECT_VERSION})
52+
target_compile_definitions(${VSPY_TARGET_NAME} PRIVATE TILEDBVS_ENABLE_STATS VERSION_INFO=${PROJECT_VERSION})
5353

5454
if (APPLE)
5555
set_target_properties(${VSPY_TARGET_NAME} PROPERTIES INSTALL_RPATH "@loader_path/lib")

apis/python/src/tiledb/vector_search/module.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ using Ctx = tiledb::Context;
1414
bool global_debug = true;
1515
double global_time_of_interest;
1616

17+
bool enable_stats = false;
18+
std::vector<json> core_stats;
19+
1720
PYBIND11_MAKE_OPAQUE(std::vector<uint32_t>);
1821
PYBIND11_MAKE_OPAQUE(std::vector<uint64_t>);
1922

@@ -249,6 +252,24 @@ PYBIND11_MODULE(_tiledbvspy, m) {
249252
return validate_top_k(top_k, ground_truth);
250253
});
251254

255+
m.def("stats_enable", []() {
256+
enable_stats = true;
257+
tiledb::Stats::enable();
258+
});
259+
260+
m.def("stats_disable", []() {
261+
enable_stats = false;
262+
tiledb::Stats::disable();
263+
});
264+
265+
m.def("stats_reset", []() {
266+
core_stats.clear();
267+
});
268+
269+
m.def("stats_dump", []() {
270+
return json{core_stats}.dump();
271+
});
272+
252273
declare_kmeans_query<uint8_t>(m, "u8");
253274
declare_kmeans_query<float>(m, "f32");
254275

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/**
2+
* @file tiledb_helpers.h
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2023 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* Helper functions for certain TileDB operations.
31+
*
32+
*/
33+
34+
#ifndef TILEDB_HELPERS_H
35+
#define TILEDB_HELPERS_H
36+
37+
#include "stats.h"
38+
#include <tiledb/tiledb>
39+
40+
namespace tiledb_helpers {
41+
42+
/**
43+
* @brief Opens a TileDB array and displays stats to stderr.
44+
*
45+
* Stats are only collected if the TILEDBVS_ENABLE_STATS symbol is
46+
* defined, and a variable named enable_stats is set to true.
47+
* The stats are written to a FILE* specified by the variable named stats_file.
48+
*
49+
* @param function_name The name of the function calling this. You can use the tdb_func__ macro.
50+
* @param ctx The TileDB context to use.
51+
* @param uri The URI of the array to open.
52+
* @param query_type The mode to open the array.
53+
*/
54+
inline tiledb::Array open_array(const std::string &function_name,
55+
const tiledb::Context &ctx,
56+
const std::string &uri,
57+
tiledb_query_type_t query_type) {
58+
StatsCollectionScope stats_scope(uri, function_name, "open_array");
59+
return tiledb::Array(ctx, uri, query_type);
60+
}
61+
62+
/**
63+
* @brief Submits a TileDB query and displays stats to stderr.
64+
*
65+
* Stats are only collected if the TILEDBVS_ENABLE_STATS symbol is
66+
* defined, and a variable named enable_stats is set to true.
67+
* The stats are written to a FILE* specified by the variable named stats_file.
68+
*
69+
* @param function_name The name of the function calling this. You can use the tdb_func__ macro.
70+
* @param query The query to submit.
71+
*/
72+
inline void submit_query(const std::string &function_name,
73+
const std::string &uri,
74+
tiledb::Query &query) {
75+
StatsCollectionScope stats_scope(uri, function_name, "submit_query");
76+
query.submit();
77+
}
78+
79+
} // namespace tiledb_helpers
80+
81+
#endif

src/include/detail/linalg/tdb_io.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ void write_matrix(
8585
0, (int)A.num_rows() - 1, 0, (int)A.num_cols() - 1};
8686

8787
// Open array for writing
88-
tiledb::Array array(ctx, uri, TILEDB_WRITE);
88+
tiledb::Array array = tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_WRITE);
8989

9090
tiledb::Subarray subarray(ctx, array);
9191
subarray.set_subarray(subarray_vals);
@@ -96,7 +96,7 @@ void write_matrix(
9696
.set_data_buffer(
9797
"values", &A(0, 0), (int)A.num_rows() * (int)A.num_cols())
9898
.set_subarray(subarray);
99-
query.submit();
99+
tiledb_helpers::submit_query(tdb_func__, uri, query);
100100

101101
array.close();
102102
}
@@ -132,7 +132,7 @@ void write_vector(
132132
std::vector<int32_t> subarray_vals{0, (int)size(v) - 1};
133133

134134
// Open array for writing
135-
tiledb::Array array(ctx, uri, TILEDB_WRITE);
135+
tiledb::Array array = tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_WRITE);
136136

137137
tiledb::Subarray subarray(ctx, array);
138138
subarray.set_subarray(subarray_vals);
@@ -141,7 +141,7 @@ void write_vector(
141141
query.set_layout(TILEDB_ROW_MAJOR)
142142
.set_data_buffer("values", v)
143143
.set_subarray(subarray);
144-
query.submit();
144+
tiledb_helpers::submit_query(tdb_func__, uri, query);
145145

146146
array.close();
147147
}
@@ -157,7 +157,7 @@ std::vector<T> read_vector(const tiledb::Context& ctx, const std::string& uri) {
157157
std::cerr << "# Reading std::vector: " << uri << std::endl;
158158
}
159159

160-
auto array_ = tiledb::Array{ctx, uri, TILEDB_READ};
160+
tiledb::Array array_ = tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ);
161161
auto schema_ = array_.schema();
162162

163163
using domain_type = int32_t;
@@ -189,7 +189,7 @@ std::vector<T> read_vector(const tiledb::Context& ctx, const std::string& uri) {
189189
tiledb::Query query(ctx, array_);
190190
query.set_subarray(subarray).set_data_buffer(
191191
attr_name, data_.data(), vec_rows_);
192-
query.submit();
192+
tiledb_helpers::submit_query(tdb_func__, uri, query);
193193
_memory_data.insert_entry(tdb_func__, vec_rows_ * sizeof(T));
194194

195195
array_.close();

src/include/detail/linalg/tdb_matrix.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
7272
log_timer constructor_timer{"tdbMatrix constructor"};
7373

7474
std::reference_wrapper<const tiledb::Context> ctx_;
75+
std::string uri_;
7576
tiledb::Array array_;
7677
tiledb::ArraySchema schema_;
7778
std::unique_ptr<T[]> backing_data_;
@@ -199,7 +200,8 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
199200
size_t col_begin,
200201
size_t col_end) // noexcept
201202
: ctx_{ctx}
202-
, array_{ctx, uri, TILEDB_READ}
203+
, uri_{uri}
204+
, array_{tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ)}
203205
, schema_{array_.schema()} {
204206
constructor_timer.stop();
205207
scoped_timer _{tdb_func__ + uri};
@@ -282,7 +284,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
282284
query.set_subarray(subarray)
283285
.set_layout(layout_order)
284286
.set_data_buffer(attr_name, data_.get(), num_rows * num_cols);
285-
query.submit();
287+
tiledb_helpers::submit_query(tdb_func__, uri, query);
286288
_memory_data.insert_entry(tdb_func__, num_rows * num_cols * sizeof(T));
287289

288290
// assert(tiledb::Query::Status::COMPLETE == query.query_status());
@@ -395,7 +397,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
395397
query.set_subarray(subarray)
396398
.set_layout(layout_order)
397399
.set_data_buffer(attr_name, ptr, num_elements);
398-
query.submit();
400+
tiledb_helpers::submit_query(tdb_func__, uri, query);
399401
_memory_data.insert_entry(tdb_func__, num_elements * sizeof(T));
400402

401403
// assert(tiledb::Query::Status::COMPLETE == query.query_status());
@@ -417,7 +419,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
417419
*/
418420
auto attr_idx = 0;
419421

420-
auto ids_array_ = tiledb::Array{ctx_, id_uri, TILEDB_READ};
422+
tiledb::Array ids_array_ = tiledb_helpers::open_array(tdb_func__, ctx_, id_uri, TILEDB_READ);
421423
auto ids_schema_ = ids_array_.schema();
422424

423425
auto attr_num{ids_schema_.attribute_num()};
@@ -449,7 +451,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
449451
auto ptr = part_ids.data() + offset;
450452
query.set_subarray(subarray).set_data_buffer(
451453
attr_name, ptr, num_elements);
452-
query.submit();
454+
tiledb_helpers::submit_query(tdb_func__, uri, query);
453455
_memory_data.insert_entry(tdb_func__, num_elements * sizeof(T));
454456

455457
if (tiledb::Query::Status::COMPLETE != query.query_status()) {
@@ -550,7 +552,7 @@ class tdbMatrix : public Matrix<T, LayoutPolicy, I> {
550552
query.set_subarray(subarray)
551553
.set_layout(layout_order)
552554
.set_data_buffer(attr_name, this_data, read_size);
553-
query.submit();
555+
tiledb_helpers::submit_query(tdb_func__, uri_, query);
554556
_memory_data.insert_entry(tdb_func__, read_size * sizeof(T));
555557

556558
// assert(tiledb::Query::Status::COMPLETE == query.query_status());

src/include/detail/linalg/tdb_partitioned_matrix.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class tdbPartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
106106
log_timer constructor_timer{"tdbPartitionedMatrix constructor"};
107107

108108
std::reference_wrapper<const tiledb::Context> ctx_;
109+
std::string uri_;
109110
tiledb::Array array_;
110111
tiledb::ArraySchema schema_;
111112
std::unique_ptr<T[]> backing_data_;
@@ -178,9 +179,10 @@ class tdbPartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
178179
size_t nthreads)
179180
: constructor_timer{tdb_func__ + std::string{" constructor"}}
180181
, ctx_{ctx}
181-
, array_{ctx_, uri, TILEDB_READ}
182+
, uri_{uri}
183+
, array_{tiledb_helpers::open_array(tdb_func__, ctx_, uri, TILEDB_READ)}
182184
, schema_{array_.schema()}
183-
, ids_array_{ctx_, ids_uri, TILEDB_READ}
185+
, ids_array_{tiledb_helpers::open_array(tdb_func__, ctx_, ids_uri, TILEDB_READ)}
184186
, ids_schema_{ids_array_.schema()}
185187
, indices_{std::move(in_indices)}
186188
, parts_{in_parts}
@@ -345,7 +347,7 @@ class tdbPartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
345347
query.set_subarray(subarray)
346348
.set_layout(layout_order)
347349
.set_data_buffer(attr_name, ptr, col_count * dimension);
348-
query.submit();
350+
tiledb_helpers::submit_query(tdb_func__, uri_, query);
349351
_memory_data.insert_entry(tdb_func__, col_count * dimension * sizeof(T));
350352

351353
// assert(tiledb::Query::Status::COMPLETE == query.query_status());

src/include/linalg.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "detail/linalg/choose_blas.h"
4747
#include "detail/linalg/linalg_defs.h"
4848
#include "detail/linalg/matrix.h"
49+
#include "detail/linalg/tdb_helpers.h"
4950
#include "detail/linalg/tdb_io.h"
5051
#include "detail/linalg/tdb_matrix.h"
5152
#include "detail/linalg/tdb_partitioned_matrix.h"

src/include/stats.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,47 @@
5353

5454
using json = nlohmann::json;
5555

56+
// Make stats support opt-in to avoid requiring to define an enable_stats variable on all projects.
57+
#ifdef TILEDBVS_ENABLE_STATS
58+
extern bool enable_stats;
59+
extern std::vector<json> core_stats;
60+
#endif
5661

62+
class StatsCollectionScope final {
63+
public:
64+
explicit StatsCollectionScope(const std::string &uri, const std::string& function, const std::string &operation_type) {
65+
#ifdef TILEDBVS_ENABLE_STATS
66+
if (!enable_stats)
67+
return;
68+
tiledb::Stats::reset();
69+
uri_ = uri;
70+
function_ = function;
71+
operation_type_ = operation_type;
72+
#else
73+
std::ignore = std::make_tuple(uri, function, operation_type);
74+
#endif
75+
}
76+
77+
~StatsCollectionScope() {
78+
#ifdef TILEDBVS_ENABLE_STATS
79+
if (!enable_stats)
80+
return;
81+
std::string stats_str;
82+
tiledb::Stats::raw_dump(&stats_str);
83+
core_stats.push_back({
84+
{"uri", uri_},
85+
{"function", function_},
86+
{"operation_type", operation_type_},
87+
{"stats", json::parse(stats_str)}
88+
});
89+
#endif
90+
}
91+
92+
#ifdef TILEDBVS_ENABLE_STATS
93+
private:
94+
std::string uri_, function_, operation_type_;
95+
#endif
96+
};
5797

5898
auto dump_logs = [](std::ostream& output,
5999
const std::string algorithm,

src/include/test/time_open.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11

22
#include <string>
33
#include <tiledb/tiledb>
4+
#include "detail/linalg/tdb_helpers.h"
45
#include "utils/timer.h"
56

67
void open_array(const std::string& uri) {
78
scoped_timer _{"open_array " + uri};
89

910
tiledb::Context ctx;
10-
tiledb::Array array(ctx, uri, TILEDB_READ);
11+
tiledb::Array array = tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ);
1112

1213
scoped_timer _2{"get_schema portion"};
1314
tiledb::ArraySchema schema = array.schema();

src/include/test/unit_slicing.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ TEST_CASE("slice", "[linalg][ci-skip]") {
5050
std::vector<int> data2_(288);
5151
std::vector<float> value_(288);
5252

53-
tiledb::Array array_{ctx_, uri, TILEDB_READ};
53+
tiledb::Array array_ = tiledb_helpers::open_array(tdb_func__, ctx_, uri, TILEDB_READ);
5454
tiledb::ArraySchema schema_{array_.schema()};
5555
tiledb::Query query(ctx_, array_);
5656

@@ -66,7 +66,7 @@ TEST_CASE("slice", "[linalg][ci-skip]") {
6666
.set_data_buffer("rows", data_.data(), 288)
6767
.set_data_buffer("a", value_.data(), 288);
6868

69-
query.submit();
69+
tiledb_helpers::submit_query(tdb_func__, uri, query);
7070

7171
for (int i = 0; i < 135; i++) {
7272
std::cout << data_[i] << ", " << data2_[i] << ": " << value_[i]

0 commit comments

Comments
 (0)