Skip to content

Commit c2af64c

Browse files
authored
Change query plan capnp to use structured data (#4611)
This PR changes the CAPNP format not to serialize the JSON dump of the Query Plan, but to serialize just the required info from the Cloud server (only `Query strategy` for now). It also fixes a couple of issues found during testing, the most ambiguous being: - Part of the information Query Plan prints to the user is `VFS.Backend` . For local queries that’s trivial to get from parsing the array URI. For remote ones, it’s feasible again to get it from URI if it’s of the form `tiledb://<namespace>/<backend>://<bucket>/<array_name>` but not if it’s in form `tiledb://<namespace>/<array-name>`. To address this for now I am printing `Unknown` as `VFS.Backend` for such array URIs but I am open to better ideas. example QueryPlan output: ``` "TileDB Query Plan": { "Array.Type": "sparse", "Array.URI": "tiledb://unit/s3://tiledb-ypatia/scratch/aov2w20", "Query.Attributes": [ "value" ], "Query.Dimensions": [ "__tiledb_rows" ], "Query.Layout": "global-order", "Query.Strategy.Name": "SparseGlobalOrderReader", "VFS.Backend": "s3" } ``` --- TYPE: NO_HISTORY DESC: Change query plan capnp to use structured data
1 parent 31bb094 commit c2af64c

File tree

13 files changed

+676
-922
lines changed

13 files changed

+676
-922
lines changed

test/src/unit-request-handlers.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ struct HandleQueryPlanRequestFx : RequestHandlerFx {
8585
}
8686

8787
virtual shared_ptr<ArraySchema> create_schema() override;
88-
std::string call_handler(SerializationType stype, Query& query);
88+
QueryPlan call_handler(SerializationType stype, Query& query);
8989
};
9090

9191
struct HandleConsolidationPlanRequestFx : RequestHandlerFx {
@@ -229,7 +229,7 @@ TEST_CASE_METHOD(
229229
auto query_plan_ser_deser = call_handler(stype, *query->query_);
230230

231231
// Compare the two query plans
232-
REQUIRE(query_plan->view() == query_plan_ser_deser);
232+
REQUIRE(query_plan->view() == query_plan_ser_deser.dump_json());
233233

234234
// Clean up
235235
REQUIRE(tiledb_array_close(ctx, array) == TILEDB_OK);
@@ -466,7 +466,7 @@ shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {
466466
return schema;
467467
}
468468

469-
std::string HandleQueryPlanRequestFx::call_handler(
469+
QueryPlan HandleQueryPlanRequestFx::call_handler(
470470
SerializationType stype, Query& query) {
471471
auto ctx = tiledb::Context();
472472
auto array = tiledb::Array(ctx, uri_.to_string(), TILEDB_READ);
@@ -483,8 +483,8 @@ std::string HandleQueryPlanRequestFx::call_handler(
483483
resp_buf);
484484
REQUIRE(rval == TILEDB_OK);
485485

486-
auto query_plan =
487-
serialization::deserialize_query_plan_response(stype, resp_buf->buffer());
486+
auto query_plan = serialization::deserialize_query_plan_response(
487+
query, stype, resp_buf->buffer());
488488

489489
tiledb_buffer_handle_t::break_handle(req_buf);
490490
tiledb_buffer_handle_t::break_handle(resp_buf);

tiledb/api/c_api/query_plan/query_plan_api.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ capi_return_t tiledb_query_get_plan(
4848
throw CAPIStatusException("argument `query` may not be nullptr");
4949
}
5050

51+
if ((*query->query_).array()->is_remote()) {
52+
throw std::logic_error(
53+
"Failed to create a query plan; Remote arrays"
54+
"are not currently supported.");
55+
}
56+
5157
sm::QueryPlan plan(*query->query_);
5258

5359
*rv = tiledb_string_handle_t::make_handle(plan.dump_json());

tiledb/sm/c_api/tiledb.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4347,7 +4347,7 @@ capi_return_t tiledb_handle_query_plan_request(
43474347
const tiledb_buffer_t* request,
43484348
tiledb_buffer_t* response) {
43494349
if (sanity_check(ctx, array) == TILEDB_ERR) {
4350-
throw std::invalid_argument("Array paramter must be valid.");
4350+
throw std::invalid_argument("Array parameter must be valid.");
43514351
}
43524352

43534353
api::ensure_buffer_is_valid(request);
@@ -4362,7 +4362,7 @@ capi_return_t tiledb_handle_query_plan_request(
43624362
sm::QueryPlan plan(query);
43634363

43644364
tiledb::sm::serialization::serialize_query_plan_response(
4365-
plan.dump_json(),
4365+
plan,
43664366
static_cast<tiledb::sm::SerializationType>(serialization_type),
43674367
response->buffer());
43684368

tiledb/sm/filesystem/uri.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,17 @@ std::string URI::to_path(const std::string& uri) {
326326
}
327327

328328
std::string URI::backend_name() const {
329-
return uri_.substr(0, uri_.find_first_of(':'));
329+
if (is_tiledb(uri_)) {
330+
std::string array_ns, array_uri;
331+
throw_if_not_ok(URI(uri_).get_rest_components(&array_ns, &array_uri));
332+
auto prefix = array_uri.substr(0, array_uri.find_first_of(':'));
333+
if (prefix == array_uri) { // no `:` separator found in URI
334+
prefix = "Unknown";
335+
}
336+
return prefix;
337+
} else {
338+
return uri_.substr(0, uri_.find_first_of(':'));
339+
}
330340
}
331341

332342
std::string URI::to_path() const {

tiledb/sm/query_plan/query_plan.cc

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,18 @@ namespace sm {
4848
/* ********************************* */
4949
/* CONSTRUCTORS & DESTRUCTORS */
5050
/* ********************************* */
51-
QueryPlan::QueryPlan(Query& query) {
52-
if (query.array()->is_remote()) {
53-
throw std::logic_error(
54-
"Failed to create a query plan; Remote arrays"
55-
"are not currently supported.");
56-
}
57-
51+
QueryPlan::QueryPlan(Query& query, std::string strategy) {
5852
array_uri_ = query.array()->array_uri().to_string();
5953
vfs_backend_ = URI(array_uri_).backend_name();
6054
query_layout_ = query.layout();
6155

62-
// This most probably ends up creating the strategy on the query
63-
auto strategy_ptr = query.strategy();
64-
strategy_name_ = strategy_ptr->name();
56+
if (strategy.empty()) {
57+
// This most probably ends up creating the strategy on the query
58+
auto strategy_ptr = query.strategy();
59+
strategy_name_ = strategy_ptr->name();
60+
} else {
61+
strategy_name_ = strategy;
62+
}
6563

6664
array_type_ = query.array()->array_schema_latest().array_type();
6765

tiledb/sm/query_plan/query_plan.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ class QueryPlan {
6565
* Constructor
6666
*
6767
* @param query A query object for which we want to calculate the plan
68+
* @param strategy The strategy of the query, required only for remote queries
6869
*/
69-
QueryPlan(Query& query);
70+
QueryPlan(Query& query, const std::string strategy = "");
7071

7172
/* ****************************** */
7273
/* API */
@@ -81,6 +82,13 @@ class QueryPlan {
8182
*/
8283
std::string dump_json(uint32_t indent = 4);
8384

85+
/*
86+
* Get the strategy name stored in the query plan
87+
*/
88+
inline std::string strategy() const {
89+
return strategy_name_;
90+
}
91+
8492
private:
8593
/* ****************************** */
8694
/* PRIVATE ATTRIBUTES */

tiledb/sm/serialization/query.cc

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,8 @@ Status query_from_capnp(
16131613
void* buffer_start,
16141614
CopyState* const copy_state,
16151615
Query* const query,
1616-
ThreadPool* compute_tp) {
1616+
ThreadPool* compute_tp,
1617+
const bool query_plan) {
16171618
using namespace tiledb::sm;
16181619

16191620
auto array = query->array();
@@ -2114,12 +2115,16 @@ Status query_from_capnp(
21142115
attr_state->validity_len_data.swap(validity_buff);
21152116

21162117
throw_if_not_ok(query->set_data_buffer(
2117-
name, varlen_data, &attr_state->var_len_size, true, true));
2118+
name, varlen_data, &attr_state->var_len_size, !query_plan, true));
21182119
throw_if_not_ok(query->set_offsets_buffer(
2119-
name, offsets, &attr_state->fixed_len_size, true, true));
2120+
name, offsets, &attr_state->fixed_len_size, !query_plan, true));
21202121
if (nullable) {
21212122
throw_if_not_ok(query->set_validity_buffer(
2122-
name, validity, &attr_state->validity_len_size, true, true));
2123+
name,
2124+
validity,
2125+
&attr_state->validity_len_size,
2126+
!query_plan,
2127+
true));
21232128
}
21242129
} else {
21252130
auto* data = attribute_buffer_start;
@@ -2142,10 +2147,14 @@ Status query_from_capnp(
21422147
attr_state->validity_len_data.swap(validity_buff);
21432148

21442149
throw_if_not_ok(query->set_data_buffer(
2145-
name, data, &attr_state->fixed_len_size, true, true));
2150+
name, data, &attr_state->fixed_len_size, !query_plan, true));
21462151
if (nullable) {
21472152
throw_if_not_ok(query->set_validity_buffer(
2148-
name, validity, &attr_state->validity_len_size, true, true));
2153+
name,
2154+
validity,
2155+
&attr_state->validity_len_size,
2156+
!query_plan,
2157+
true));
21492158
}
21502159
}
21512160
} else {

tiledb/sm/serialization/query.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ Status query_from_capnp(
275275
void* buffer_start,
276276
CopyState* const copy_state,
277277
Query* const query,
278-
ThreadPool* compute_tp);
278+
ThreadPool* compute_tp,
279+
const bool query_plan = false);
279280

280281
#endif
281282

tiledb/sm/serialization/query_plan.cc

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,23 +75,24 @@ void query_plan_request_from_capnp(
7575
nullptr,
7676
nullptr,
7777
&query,
78-
&compute_tp));
78+
&compute_tp,
79+
true));
7980
}
8081
}
8182

8283
void query_plan_response_to_capnp(
83-
capnp::QueryPlanResponse::Builder& builder, const std::string& query_plan) {
84-
builder.setQueryPlan(query_plan);
84+
capnp::QueryPlanResponse::Builder& builder, const QueryPlan& query_plan) {
85+
builder.setStrategy(query_plan.strategy());
8586
}
8687

8788
std::string query_plan_response_from_capnp(
8889
const capnp::QueryPlanResponse::Reader& reader) {
89-
std::string query_plan;
90-
if (reader.hasQueryPlan()) {
91-
query_plan = reader.getQueryPlan().cStr();
90+
std::string strategy;
91+
if (reader.hasStrategy()) {
92+
strategy = reader.getStrategy().cStr();
9293
}
9394

94-
return query_plan;
95+
return strategy;
9596
}
9697

9798
void serialize_query_plan_request(
@@ -193,7 +194,7 @@ void deserialize_query_plan_request(
193194
}
194195

195196
void serialize_query_plan_response(
196-
const std::string& query_plan,
197+
const QueryPlan& query_plan,
197198
SerializationType serialization_type,
198199
Buffer& response) {
199200
try {
@@ -243,8 +244,10 @@ void serialize_query_plan_response(
243244
}
244245
}
245246

246-
std::string deserialize_query_plan_response(
247-
SerializationType serialization_type, const Buffer& response) {
247+
QueryPlan deserialize_query_plan_response(
248+
Query& query,
249+
SerializationType serialization_type,
250+
const Buffer& response) {
248251
try {
249252
switch (serialization_type) {
250253
case SerializationType::JSON: {
@@ -255,7 +258,7 @@ std::string deserialize_query_plan_response(
255258
json.decode(
256259
kj::StringPtr(static_cast<const char*>(response.data())), builder);
257260
capnp::QueryPlanResponse::Reader reader = builder.asReader();
258-
return query_plan_response_from_capnp(reader);
261+
return QueryPlan(query, query_plan_response_from_capnp(reader));
259262
}
260263
case SerializationType::CAPNP: {
261264
const auto mBytes = reinterpret_cast<const kj::byte*>(response.data());
@@ -264,7 +267,7 @@ std::string deserialize_query_plan_response(
264267
response.size() / sizeof(::capnp::word)));
265268
capnp::QueryPlanResponse::Reader reader =
266269
array_reader.getRoot<capnp::QueryPlanResponse>();
267-
return query_plan_response_from_capnp(reader);
270+
return QueryPlan(query, query_plan_response_from_capnp(reader));
268271
}
269272
default: {
270273
throw Status_SerializationError(
@@ -298,13 +301,13 @@ void deserialize_query_plan_request(
298301
}
299302

300303
void serialize_query_plan_response(
301-
const std::string&, const SerializationType, Buffer&) {
304+
const QueryPlan&, const SerializationType, Buffer&) {
302305
throw Status_SerializationError(
303306
"Cannot serialize; serialization not enabled.");
304307
}
305308

306-
std::string deserialize_query_plan_response(
307-
const SerializationType, const Buffer&) {
309+
QueryPlan deserialize_query_plan_response(
310+
Query&, const SerializationType, const Buffer&) {
308311
throw Status_SerializationError(
309312
"Cannot serialize; serialization not enabled.");
310313
}

tiledb/sm/serialization/query_plan.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,22 @@ void deserialize_query_plan_request(
8484
* @param response Buffer to store serialized bytes in.
8585
*/
8686
void serialize_query_plan_response(
87-
const std::string& query_plan,
87+
const QueryPlan& query_plan,
8888
const SerializationType serialization_type,
8989
Buffer& response);
9090

9191
/**
92-
* Deserialize a Query Plan request to cap'n proto object
92+
* Deserialize a Query Plan response from cap'n proto object
9393
*
94+
* @param query The query the plan is requested for.
9495
* @param serialization_type Format to serialize from: Cap'n Proto or JSON.
9596
* @param response Buffer to read serialized bytes from.
96-
* @return The requested query plan as a string.
97+
* @return The requested query plan.
9798
*/
98-
std::string deserialize_query_plan_response(
99-
const SerializationType serialization_type, const Buffer& response);
99+
QueryPlan deserialize_query_plan_response(
100+
Query& query,
101+
const SerializationType serialization_type,
102+
const Buffer& response);
100103

101104
} // namespace serialization
102105
} // namespace tiledb::sm

0 commit comments

Comments
 (0)