diff --git a/cmake/oxidize.cmake b/cmake/oxidize.cmake index b381d2c22f8..cb494875ce5 100644 --- a/cmake/oxidize.cmake +++ b/cmake/oxidize.cmake @@ -152,12 +152,9 @@ if (TILEDB_RUST) set(CARGO_INSTALL_ROOT ${CMAKE_BINARY_DIR}/cargo/install) set(CARGO_INSTALL_BIN ${CARGO_INSTALL_ROOT}/bin) - # pin version of cxxbridge due to https://github.com/dtolnay/cxx/issues/1436 build errors on MacOS - set(CXXBRIDGE_VERSION 1.0.138) - execute_process( COMMAND - ${CARGO} install cxxbridge-cmd --version ${CXXBRIDGE_VERSION} --root ${CARGO_INSTALL_ROOT} + ${CARGO} install cxxbridge-cmd --root ${CARGO_INSTALL_ROOT} ) execute_process( COMMAND diff --git a/examples/c_api/query_add_predicate.c b/examples/c_api/query_add_predicate.c new file mode 100644 index 00000000000..554c2d1f876 --- /dev/null +++ b/examples/c_api/query_add_predicate.c @@ -0,0 +1,509 @@ +/** + * @file query_condition_sparse.c + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2022 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This example demonstrates using the experimental `tiledb_query_add_predicate` + * API to add one or more text predicates to a query. This API parses a SQL + * predicate and uses it to filter results inside of the storage engine + * before returning them to the user. + * + * The array used in this example is identical to that of the + * `query_condition_sparse` example. The first group of predicates which + * run are text equivalents of the predicates in that example, and produce + * the same results. + * + * This example also has additional queries which use predicates which + * combine dimensions and attributes, highlighting a capability which + * cannot be replicated by just subarrays and query conditions. + */ + +#include +#include +#include +#include +#include +#include +#include + +// Name of array. +const char* array_name = "array_query_add_predicate"; + +#define TRY(ctx, action) \ + do { \ + const capi_return_t r = (action); \ + if (r != TILEDB_OK) { \ + return print_last_error((ctx), r); \ + } \ + } while (0) + +#define RETURN_IF_NOT_OK(r) \ + do { \ + const int32_t status = (r); \ + if (status != TILEDB_OK) { \ + return status; \ + } \ + } while (0) + +/** + * Enumeration variants + */ +static const char* const states[] = { + "alabama", + "alaska", + "arizona", + "arkansas", + "california", + "colorado", + "connecticut", + "etc"}; + +/** + * @brief Function to print the values of all the attributes for one + * index of this array. + * + * @param a Attribute a's value. + * @param b Attribute b's value. + * @param c Attribute c's value. + * @param d Attribute d's value. + */ +void print_elem( + int* a, char* b_start, int b_len, int32_t c, float d, uint8_t* e) { + char print_a[8], print_e[32]; + if (a == NULL) { + strcpy(&print_a[0], "null"); + } else { + sprintf(&print_a[0], "%d", *a); + } + if (e == NULL) { + strcpy(&print_e[0], "null"); + } else if (*e < sizeof(states) / sizeof(const char*)) { + strcpy(&print_e[0], states[*e]); + } else { + sprintf(&print_e[0], "(invalid key %hhu)", *e); + } + + printf("{%s, %.*s, %d, %.1f, %s}\n", print_a, b_len, b_start, c, d, print_e); +} + +/** + * Retrieve and print the last error. + * + * @param ctx The context object to get the error from. + */ +int32_t print_last_error(tiledb_ctx_t* ctx, int32_t rc) { + tiledb_error_t* err = NULL; + tiledb_ctx_get_last_error(ctx, &err); + if (err == NULL) { + fprintf(stderr, "TileDB Error: Error code returned but no error found."); + return rc; + } + const char* msg = NULL; + tiledb_error_message(err, &msg); + if (msg == NULL) { + fprintf(stderr, "TileDB Error"); + } else { + fprintf(stderr, "%s\n", msg); + } + return rc; +} + +/** + * @brief Function to create the TileDB array used in this example. + * The array will be 1D with size 1 with dimension "index". + * The bounds on the index will be 0 through 9, inclusive. + * + * The array has two attributes. The two attributes are + * - "a" (type int) + * - "b" (type ASCII string) + * - "c" (type int32_t) + * - "d" (type float) + * + * @param ctx The context. + */ +int32_t create_array(tiledb_ctx_t* ctx) { + // Creating the dimension and the domain. + tiledb_dimension_t* dimension; + int dim_domain[] = {0, 9}; + int tile_extents[] = {1}; + TRY(ctx, + tiledb_dimension_alloc( + ctx, + "index", + TILEDB_INT32, + &dim_domain[0], + &tile_extents[0], + &dimension)); + + tiledb_domain_t* domain; + TRY(ctx, tiledb_domain_alloc(ctx, &domain)); + TRY(ctx, tiledb_domain_add_dimension(ctx, domain, dimension)); + + // The array will be sparse. + tiledb_array_schema_t* schema; + TRY(ctx, tiledb_array_schema_alloc(ctx, TILEDB_SPARSE, &schema)); + TRY(ctx, tiledb_array_schema_set_domain(ctx, schema, domain)); + TRY(ctx, tiledb_array_schema_set_cell_order(ctx, schema, TILEDB_ROW_MAJOR)); + + // Create enumeration + size_t states_size = 0; + for (uint64_t i = 0; i < sizeof(states) / sizeof(const char*); i++) { + states_size += strlen(states[i]); + } + const uint64_t states_offsets_size = + (sizeof(states) / sizeof(const char*)) * sizeof(uint64_t); + + char* states_values = (char*)(malloc(states_size)); + uint64_t* states_offsets = (uint64_t*)(malloc(states_offsets_size)); + + states_size = 0; + for (uint64_t i = 0; i < sizeof(states) / sizeof(const char*); i++) { + const uint64_t slen = strlen(states[i]); + memcpy(&states_values[states_size], &states[i][0], slen); + states_offsets[i] = states_size; + states_size += slen; + } + tiledb_enumeration_t* enumeration_states; + TRY(ctx, + tiledb_enumeration_alloc( + ctx, + "us_states", + TILEDB_STRING_ASCII, + UINT32_MAX, + false, + states_values, + states_size, + states_offsets, + states_offsets_size, + &enumeration_states)); + free(states_offsets); + free(states_values); + + TRY(ctx, + tiledb_array_schema_add_enumeration(ctx, schema, enumeration_states)); + + // Adding the attributes of the array to the array schema. + tiledb_attribute_t* a; + TRY(ctx, tiledb_attribute_alloc(ctx, "a", TILEDB_INT32, &a)); + TRY(ctx, tiledb_attribute_set_nullable(ctx, a, true)); + + tiledb_attribute_t* b; + TRY(ctx, tiledb_attribute_alloc(ctx, "b", TILEDB_STRING_ASCII, &b)); + TRY(ctx, tiledb_attribute_set_cell_val_num(ctx, b, TILEDB_VAR_NUM)); + + tiledb_attribute_t* c; + TRY(ctx, tiledb_attribute_alloc(ctx, "c", TILEDB_INT32, &c)); + + tiledb_attribute_t* d; + TRY(ctx, tiledb_attribute_alloc(ctx, "d", TILEDB_FLOAT32, &d)); + + tiledb_attribute_t* e; + TRY(ctx, tiledb_attribute_alloc(ctx, "e", TILEDB_UINT8, &e)); + TRY(ctx, tiledb_attribute_set_nullable(ctx, e, true)); + TRY(ctx, tiledb_attribute_set_enumeration_name(ctx, e, "us_states")); + + TRY(ctx, tiledb_array_schema_add_attribute(ctx, schema, a)); + TRY(ctx, tiledb_array_schema_add_attribute(ctx, schema, b)); + TRY(ctx, tiledb_array_schema_add_attribute(ctx, schema, c)); + TRY(ctx, tiledb_array_schema_add_attribute(ctx, schema, d)); + TRY(ctx, tiledb_array_schema_add_attribute(ctx, schema, e)); + + // Create the (empty) array. + TRY(ctx, tiledb_array_create(ctx, array_name, schema)); + + // Cleanup. + tiledb_attribute_free(&e); + tiledb_attribute_free(&d); + tiledb_attribute_free(&c); + tiledb_attribute_free(&b); + tiledb_attribute_free(&a); + tiledb_array_schema_free(&schema); + tiledb_domain_free(&domain); + tiledb_dimension_free(&dimension); + + return TILEDB_OK; +} + +/** + * @brief Execute a write on array query_condition_sparse array + * which then stores the following data in the array. The table + * is organized by dimension/attribute. + * + * index | a | b | c | d | e + * ------+------+-------+---+-----+------------ + * 0 | null | alice | 0 | 4.1 | arizona + * 1 | 2 | bob | 0 | 3.4 | etc + * 2 | null | craig | 0 | 5.6 | connecticut + * 3 | 4 | dave | 0 | 3.7 | colorado + * 4 | null | erin | 0 | 2.3 | null + * 5 | 6 | frank | 0 | 1.7 | arkansas + * 6 | null | grace | 1 | 3.8 | etc + * 7 | 8 | heidi | 2 | 4.9 | etc + * 8 | null | ivan | 3 | 3.2 | colorado + * 9 | 10 | judy | 4 | 3.1 | california + * + * @param ctx The context. + */ +int32_t write_array(tiledb_ctx_t* ctx) { + // Create data buffers that store the values to be written in. + int dim_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + uint64_t dim_size = sizeof(dim_data); + int32_t a_data[] = {0, 2, 0, 4, 0, 6, 0, 8, 0, 10}; + uint64_t a_size = sizeof(a_data); + uint8_t a_data_validity[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + uint64_t a_validity_size = sizeof(a_data_validity); + char* b_data = "alicebobcraigdaveerinfrankgraceheidiivanjudy"; + uint64_t b_size = strlen(b_data); + uint64_t b_data_offsets[] = {0, 5, 8, 13, 17, 21, 26, 31, 36, 40}; + uint64_t b_offsets_size = sizeof(b_data_offsets); + int32_t c_data[] = {0, 0, 0, 0, 0, 0, 1, 2, 3, 4}; + uint64_t c_size = sizeof(c_data); + float d_data[] = {4.1, 3.4, 5.6, 3.7, 2.3, 1.7, 3.8, 4.9, 3.2, 3.1}; + uint64_t d_size = sizeof(d_data); + uint8_t e_data[] = {2, 7, 5, 6, 100, 3, 7, 7, 5, 4}; + uint64_t e_size = sizeof(e_data); + uint8_t e_validity[] = {1, 1, 1, 1, 0, 1, 1, 1, 1, 1}; + uint64_t e_validity_size = sizeof(e_validity); + + tiledb_array_t* array_w; + TRY(ctx, tiledb_array_alloc(ctx, array_name, &array_w)); + TRY(ctx, tiledb_array_open(ctx, array_w, TILEDB_WRITE)); + + // Execute the write query. + tiledb_query_t* query_w; + TRY(ctx, tiledb_query_alloc(ctx, array_w, TILEDB_WRITE, &query_w)); + TRY(ctx, tiledb_query_set_layout(ctx, query_w, TILEDB_UNORDERED)); + TRY(ctx, + tiledb_query_set_data_buffer(ctx, query_w, "index", dim_data, &dim_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query_w, "a", a_data, &a_size)); + TRY(ctx, + tiledb_query_set_validity_buffer( + ctx, query_w, "a", a_data_validity, &a_validity_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query_w, "b", b_data, &b_size)); + TRY(ctx, + tiledb_query_set_offsets_buffer( + ctx, query_w, "b", b_data_offsets, &b_offsets_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query_w, "c", c_data, &c_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query_w, "d", d_data, &d_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query_w, "e", e_data, &e_size)); + TRY(ctx, + tiledb_query_set_validity_buffer( + ctx, query_w, "e", e_validity, &e_validity_size)); + TRY(ctx, tiledb_query_submit(ctx, query_w)); + TRY(ctx, tiledb_query_finalize(ctx, query_w)); + TRY(ctx, tiledb_array_close(ctx, array_w)); + + tiledb_query_free(&query_w); + tiledb_array_free(&array_w); + + return TILEDB_OK; +} + +/** + * @brief Executes the read query for the array created in write_array. + * + * @param ctx The context. + * @param qc The query condition to execute the query with. + */ +int32_t read_array_with_predicates(tiledb_ctx_t* ctx, int num_predicates, ...) { + // Create data buffers to read the values into. + int a_data[10]; + uint64_t a_size = sizeof(a_data); + uint8_t a_data_validity[10]; + uint64_t a_validity_size = sizeof(a_data_validity); + + // We initialize the string b_data to contain 45 characters because + // that is the combined size of all strings in attribute b. + char b_data[256]; + memset(b_data, 0, 256); + uint64_t b_size = sizeof(b_data); + uint64_t b_data_offsets[10]; + uint64_t b_offsets_size = sizeof(b_data_offsets); + + int32_t c_data[10]; + uint64_t c_size = sizeof(c_data); + float d_data[10]; + uint64_t d_size = sizeof(d_data); + + uint8_t e_data[10]; + uint64_t e_size = sizeof(e_data); + uint8_t e_validity[10]; + uint64_t e_validity_size = sizeof(e_validity); + + tiledb_array_t* array; + TRY(ctx, tiledb_array_alloc(ctx, array_name, &array)); + TRY(ctx, tiledb_array_open(ctx, array, TILEDB_READ)); + + // Execute the read query. + tiledb_query_t* query; + TRY(ctx, tiledb_query_alloc(ctx, array, TILEDB_READ, &query)); + TRY(ctx, tiledb_query_set_layout(ctx, query, TILEDB_GLOBAL_ORDER)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query, "a", a_data, &a_size)); + TRY(ctx, + tiledb_query_set_validity_buffer( + ctx, query, "a", a_data_validity, &a_validity_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query, "b", b_data, &b_size)); + TRY(ctx, + tiledb_query_set_offsets_buffer( + ctx, query, "b", b_data_offsets, &b_offsets_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query, "c", c_data, &c_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query, "d", d_data, &d_size)); + TRY(ctx, tiledb_query_set_data_buffer(ctx, query, "e", e_data, &e_size)); + TRY(ctx, + tiledb_query_set_validity_buffer( + ctx, query, "e", e_validity, &e_validity_size)); + + va_list predicates; + va_start(predicates, num_predicates); + for (int i = 0; i < num_predicates; i++) { + const char* predicate = va_arg(predicates, const char*); + TRY(ctx, tiledb_query_add_predicate(ctx, query, predicate)); + } + va_end(predicates); + + TRY(ctx, tiledb_query_submit(ctx, query)); + + // Collect the results of the read query. The number of elements + // the filtered array contains is calculated by determining the + // number of valid elements in c_data, since the array is + // sparse. The length of the filtered substring of all the + // data is in b_data, and all the offsets for filtered + // individual elements are in b_data_offsets. + + // Here we print all the elements that are returned by the query. + uint64_t result_num = c_size / sizeof(int); + for (uint64_t i = 0; i < result_num; ++i) { + uint64_t element_start = b_data_offsets[i]; + uint64_t element_length = (i == result_num - 1) ? + (b_size / sizeof(char)) - element_start : + b_data_offsets[i + 1] - element_start; + print_elem( + a_data_validity[i] ? &a_data[i] : NULL, + b_data + element_start, + element_length, + c_data[i], + d_data[i], + e_validity[i] ? &e_data[i] : NULL); + } + + TRY(ctx, tiledb_query_finalize(ctx, query)); + TRY(ctx, tiledb_array_close(ctx, array)); + + tiledb_query_free(&query); + tiledb_array_free(&array); + + return TILEDB_OK; +} + +int32_t read_array_with_predicate(tiledb_ctx_t* ctx, const char* predicate) { + return read_array_with_predicates(ctx, 1, predicate); +} + +int main() { + // Create the context. + tiledb_ctx_t* ctx; + tiledb_ctx_alloc(NULL, &ctx); + + tiledb_vfs_t* vfs; + tiledb_vfs_alloc(ctx, NULL, &vfs); + + int32_t is_dir = 0; + tiledb_vfs_is_dir(ctx, vfs, array_name, &is_dir); + if (!is_dir) { + // Create and write data to the array. + RETURN_IF_NOT_OK(create_array(ctx)); + RETURN_IF_NOT_OK(write_array(ctx)); + } + + // EXAMPLES FROM query_condition_sparse.c EXAMPLE + + // Execute a read query with no predicate which prints the entire array. + printf("NO PREDICATE\n"); + RETURN_IF_NOT_OK(read_array_with_predicates(ctx, 0)); + printf("\n"); + + // Execute a read query with predicate `TRUE`, which filters no cells and + // prints the whole array + printf("WHERE TRUE\n"); + RETURN_IF_NOT_OK(read_array_with_predicate(ctx, "TRUE")); + printf("\n"); + + // Execute a read query with predicate `a = null`. + printf("WHERE a IS NULL\n"); + RETURN_IF_NOT_OK(read_array_with_predicate(ctx, "a IS NULL")); + printf("\n"); + + // Execute a read query with predicate `b < "eve"`. + printf("WHERE b < 'eve'\n"); + RETURN_IF_NOT_OK(read_array_with_predicate(ctx, "b < 'eve'")); + printf("\n"); + + // Execute a read query with predicate `c >= 1`. + printf("WHERE c >= 1\n"); + RETURN_IF_NOT_OK(read_array_with_predicate(ctx, "c >= 1")); + printf("\n"); + + // Execute a read query with predicate `3.0f <= d AND d <= 4.0f`. + printf("WHERE d BETWEEN 3.0 AND 4.0\n"); + RETURN_IF_NOT_OK(read_array_with_predicate(ctx, "d BETWEEN 3.0 AND 4.0")); + printf("\n"); + + // Execute a read query with predicate `3.0f <= d AND d <= 4.0f AND a != null + // AND b < \"eve\"`. + printf("WHERE (d BETWEEN 3.0 AND 4.0) AND a IS NOT NULL AND b < 'eve'\n"); + RETURN_IF_NOT_OK(read_array_with_predicates( + ctx, 3, "d BETWEEN 3.0 AND 4.0", "a IS NOT NULL", "b < 'eve'")); + printf("\n"); + + // BEGIN EXAMPLES WITH ENUMERATIONS + printf("WHERE e = 'california'\n"); + { + // error is expected as enumerations are not supported yet + const int32_t ret = read_array_with_predicate(ctx, "e = 'california'"); + if (ret != TILEDB_ERR) { + return TILEDB_ERR; + } + } + printf("\n"); + + // BEGIN EXAMPLES WITH NO EQUIVALENT + + // query condition does not have functions, here we use coalesce + printf("WHERE coalesce(a, 2) + c < index\n"); + RETURN_IF_NOT_OK( + read_array_with_predicate(ctx, "coalesce(a, 2) + c < index")); + printf("\n"); + + // FIXME: this is query-condition-able, use arithmetic + printf("WHERE a > 6 OR a IS NULL\n"); + RETURN_IF_NOT_OK(read_array_with_predicate(ctx, "a > 6 OR a IS NULL")); + printf("\n"); + + tiledb_ctx_free(&ctx); + + return 0; +} diff --git a/examples/cpp_api/query_add_predicate.cc b/examples/cpp_api/query_add_predicate.cc new file mode 100644 index 00000000000..c62f18fd6b1 --- /dev/null +++ b/examples/cpp_api/query_add_predicate.cc @@ -0,0 +1,364 @@ +/** + * @file query_add_predicate.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This example demonstrates using the `QueryExperimental::add_predicate` + * API to add one or more text predicates to a query. This API parses a SQL + * predicate and uses it to filter results inside of the storage engine + * before returning them to the user. + * + * The array used in this example is identical to that of the + * `query_condition_sparse` example. The first group of predicates which + * run are text equivalents of the predicates in that example, and produce + * the same results. + * + * This example also has additional queries which use predicates which + * combine dimensions and attributes, highlighting a capability which + * cannot be replicated by just subarrays and query conditions. + */ + +#include +#include +#include +#include +#include + +using namespace tiledb; + +// Name of array. +std::string array_name("array_query_add_predicate"); + +// Enumeration variants +const std::vector us_states = { + "alabama", + "alaska", + "arizona", + "arkansas", + "california", + "colorado", + "connecticut", + "etc"}; + +/** + * @brief Function to print the values of all the attributes for one + * index of this array. + * + * @param a Attribute a's value. + * @param b Attribute b's value. + * @param c Attribute c's value. + * @param d Attribute d's value. + */ +void print_elem( + std::optional a, + std::string b, + int32_t c, + float d, + std::optional e) { + std::cout << "{" << (a.has_value() ? std::to_string(a.value()) : "null") + << ", " << b << ", " << c << ", " << d << ", " + << (e.has_value() ? + (e.value() < us_states.size() ? + us_states[e.value()] : + "(invalid key " + std::to_string(e.value()) + ")") : + "null") + << "}" << std::endl; +} + +/** + * @brief Function to create the TileDB array used in this example. + * The array will be 1D with size 1 with dimension "index". + * The bounds on the index will be 0 through 9, inclusive. + * + * The array has four attributes. The four attributes are + * - "a" (type int) + * - "b" (type std::string) + * - "c" (type int32_t) + * - "d" (type float) + * + * @param ctx The context. + */ +void create_array(Context& ctx) { + // Creating the domain and the dimensions. + Domain domain(ctx); + domain.add_dimension(Dimension::create(ctx, "index", {{0, 9}})); + + // The array will be sparse. + ArraySchema schema(ctx, TILEDB_SPARSE); + schema.set_domain(domain).set_order({{TILEDB_ROW_MAJOR}}); + + // Adding the attributes of the array to the array schema. + Attribute a = Attribute::create(ctx, "a").set_nullable(true); + schema.add_attribute(a) + .add_attribute(Attribute::create(ctx, "b")) + .add_attribute(Attribute::create(ctx, "c")) + .add_attribute(Attribute::create(ctx, "d")); + + // Create enumeration and an attribute using it + ArraySchemaExperimental::add_enumeration( + ctx, + schema, + Enumeration::create(ctx, std::string("us_states"), us_states)); + + { + auto e = Attribute::create(ctx, "e").set_nullable(true); + AttributeExperimental::set_enumeration_name(ctx, e, "us_states"); + schema.add_attribute(e); + } + + // Create the (empty) array. + Array::create(ctx, array_name, schema); +} + +/** + * @brief Execute a write on array query_condition_sparse array + * which then stores the following data in the array. The table + * is organized by dimension/attribute. + * + * index | a | b | c | d + * ------------------------------- + * 0 | null | alice | 0 | 4.1 + * 1 | 2 | bob | 0 | 3.4 + * 2 | null | craig | 0 | 5.6 + * 3 | 4 | dave | 0 | 3.7 + * 4 | null | erin | 0 | 2.3 + * 5 | 6 | frank | 0 | 1.7 + * 6 | null | grace | 1 | 3.8 + * 7 | 8 | heidi | 2 | 4.9 + * 8 | null | ivan | 3 | 3.2 + * 9 | 10 | judy | 4 | 3.1 + * + * @param ctx The context. + */ +void write_array(Context& ctx) { + // Create data buffers that store the values to be written in. + std::vector dim_data = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + std::vector a_data = {0, 2, 0, 4, 0, 6, 0, 8, 0, 10}; + std::vector a_data_validity = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + std::vector b_strs = { + "alice", + "bob", + "craig", + "dave", + "erin", + "frank", + "grace", + "heidi", + "ivan", + "judy"}; + std::string b_data = ""; + std::vector b_data_offsets; + for (const auto& elem : b_strs) { + b_data_offsets.push_back(b_data.size()); + b_data += elem; + } + std::vector c_data = {0, 0, 0, 0, 0, 0, 1, 2, 3, 4}; + std::vector d_data = { + 4.1, 3.4, 5.6, 3.7, 2.3, 1.7, 3.8, 4.9, 3.2, 3.1}; + + std::vector e_keys = {2, 7, 5, 6, 100, 3, 7, 7, 5, 4}; + std::vector e_validity = {1, 1, 1, 1, 0, 1, 1, 1, 1, 1}; + + // Execute the write query. + Array array_w(ctx, array_name, TILEDB_WRITE); + Query query_w(ctx, array_w); + query_w.set_layout(TILEDB_UNORDERED) + .set_data_buffer("index", dim_data) + .set_data_buffer("a", a_data) + .set_validity_buffer("a", a_data_validity) + .set_data_buffer("b", b_data) + .set_offsets_buffer("b", b_data_offsets) + .set_data_buffer("c", c_data) + .set_data_buffer("d", d_data) + .set_data_buffer("e", e_keys) + .set_validity_buffer("e", e_validity); + + query_w.submit(); + query_w.finalize(); + array_w.close(); +} + +/** + * @brief Executes the read query for the array created in write_array. + * + * @param ctx The context. + * @param qc The query condition to execute the query with. + */ +void read_array_with_predicates( + Context& ctx, std::vector predicates) { + const unsigned reserve_cells = 16; + + // Create data buffers to read the values into. + std::vector a_data(reserve_cells); + std::vector a_data_validity(reserve_cells); + + // We initialize the string b_data to have enough space to + // contain the total length of all of the strings written + // into attribute b + std::string b_data; + b_data.resize(256); + + std::vector b_data_offsets(reserve_cells); + std::vector c_data(reserve_cells); + std::vector d_data(reserve_cells); + std::vector e_keys(reserve_cells); + std::vector e_validity(reserve_cells); + + // reserve additional space so we can push a trailing offset + // to make the printing logic more straightforward + // (this should not be necessary but without this the `push_back` + // flags -Werror=array-bounds in some compilers) + b_data_offsets.reserve(reserve_cells + 1); + + // Execute the read query. + Array array(ctx, array_name, TILEDB_READ); + Query query(ctx, array); + query.set_layout(TILEDB_GLOBAL_ORDER) + .set_data_buffer("a", a_data) + .set_validity_buffer("a", a_data_validity) + .set_data_buffer("b", b_data) + .set_offsets_buffer("b", b_data_offsets) + .set_data_buffer("c", c_data) + .set_data_buffer("d", d_data) + .set_data_buffer("e", e_keys) + .set_validity_buffer("e", e_validity); + + for (const auto& predicate : predicates) { + QueryExperimental::add_predicate(ctx, query, predicate); + } + + query.submit(); + + // Collect the results of the read query. The number of elements + // the filtered array contains is in num_elements_result. + // The length of the filtered substring of all the data is in + // b_data, and all the offsets for filtered individual elements + // are in b_data_offsets. + auto table = query.result_buffer_elements_nullable(); + size_t num_elements_result = std::get<1>(table["c"]); + uint64_t b_str_length = std::get<1>(table["b"]); + if (num_elements_result < b_data_offsets.size()) { + b_data_offsets[num_elements_result] = b_str_length; + } else { + b_data_offsets.push_back(b_str_length); + } + + // Here we print all the elements that are returned by the query. + for (size_t i = 0; i < num_elements_result; ++i) { + // We pass in nullopt if the data is invalid, per the validity buffer. + print_elem( + (a_data_validity[i] ? std::optional{a_data[i]} : std::nullopt), + b_data.substr( + b_data_offsets[i], b_data_offsets[i + 1] - b_data_offsets[i]), + c_data[i], + d_data[i], + (e_validity[i] ? std::optional{e_keys[i]} : std::nullopt)); + } + + query.finalize(); + array.close(); +} + +int main() { + // Create the context. + Context ctx; + VFS vfs(ctx); + if (!vfs.is_dir(array_name)) { + // Create and write data to the array. + create_array(ctx); + write_array(ctx); + } + + // EXAMPLES FROM query_condition_sparse.cc EXAMPLE + + // Execute a read query with no predicate which prints the entire array. + std::cout << "NO PREDICATE" << std::endl; + read_array_with_predicates(ctx, {}); + std::cout << std::endl; + + // Execute a read query with predicate `TRUE`, which filters no cells and + // prints the whole array + std::cout << "WHERE TRUE" << std::endl; + read_array_with_predicates(ctx, {"TRUE"}); + std::cout << std::endl; + + // Execute a read query with predicate `a = null`. + std::cout << "WHERE a IS NULL" << std::endl; + read_array_with_predicates(ctx, {"a IS NULL"}); + std::cout << std::endl; + + // Execute a read query with predicate `b < "eve"`. + std::cout << "WHERE b < 'eve'" << std::endl; + read_array_with_predicates(ctx, {"b < 'eve'"}); + std::cout << std::endl; + + // Execute a read query with predicate `c >= 1`. + std::cout << "WHERE c >= 1" << std::endl; + read_array_with_predicates(ctx, {"c >= 1"}); + std::cout << std::endl; + + // Execute a read query with predicate `3.0f <= d AND d <= 4.0f`. + std::cout << "WHERE d BETWEEN 3.0 AND 4.0" << std::endl; + QueryCondition qc3(ctx); + read_array_with_predicates(ctx, {"d BETWEEN 3.0 AND 4.0"}); + std::cout << std::endl; + + // Execute a read query with predicate `3.0f <= d AND d <= 4.0f AND a != null + // AND b < \"eve\"`. + std::cout << "WHERE d BETWEEN 3.0 AND 4.0 AND a IS NOT NULL AND b < 'eve'" + << std::endl; + read_array_with_predicates( + ctx, {"d BETWEEN 3.0 AND 4.0", "a IS NOT NULL", "b < 'eve'"}); + std::cout << std::endl; + + // BEGIN EXAMPLES WITH ENUMERATIONS + // error is expected as enumerations are not supported yet + std::cout << "WHERE e = 'california'" << std::endl; + try { + read_array_with_predicates(ctx, {"e = 'california'"}); + // should not get here + return TILEDB_ERR; + } catch (const std::exception& e) { + std::cout << e.what() << std::endl; + } + std::cout << std::endl; + + // BEGIN EXAMPLES WITH NO EQUIVALENT + // these examples cannot be expressed using subarray + query condition + + // query condition does not have functions, here we use coalesce + std::cout << "WHERE coalesce(a, 2) + c < index" << std::endl; + read_array_with_predicates(ctx, {"coalesce(a, 2) + c < index"}); + std::cout << std::endl; + + // FIXME: this is query-condition-able, use arithmetic + std::cout << "WHERE a > 6 OR a IS NULL" << std::endl; + read_array_with_predicates(ctx, {"a > 6 OR a IS NULL"}); + std::cout << std::endl; + + return 0; +} diff --git a/scripts/linter.py b/scripts/linter.py index 8c695599e04..659e7661a3c 100755 --- a/scripts/linter.py +++ b/scripts/linter.py @@ -141,6 +141,12 @@ def accept_path(self, file_name: str) -> bool: path_components = file_name.split(os.sep) if 'test' in path_components or 'test-support' in path_components: return False + + # the Rust/C++ inter-op using Rust's `cxx` crate can only pass values from + # C++ to Rust using std::unique_ptr + if 'oxidize' in path_components: + return False + return path_components[-1] not in heap_memory_ignored_files diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f7753d991d0..aa5286c0981 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -114,6 +114,7 @@ set(TILEDB_UNIT_TEST_SOURCES src/unit-ordered-dim-label-reader.cc src/unit-tile-metadata.cc src/unit-tile-metadata-generator.cc + src/unit-query-add-predicate.cc src/unit-query-plan.cc src/unit-ReadCellSlabIter.cc src/unit-Reader.cc diff --git a/test/src/unit-Reader.cc b/test/src/unit-Reader.cc index e942baa1189..b0e8f93c8af 100644 --- a/test/src/unit-Reader.cc +++ b/test/src/unit-Reader.cc @@ -163,7 +163,7 @@ TEST_CASE_METHOD( buffers.emplace( "a", tiledb::sm::QueryBuffer(nullptr, nullptr, &tmp_size, &tmp_size)); std::unordered_map aggregate_buffers; - std::optional condition; + QueryPredicates predicates; ThreadPool tp_cpu(4), tp_io(4); Array array(context.resources(), URI(array_name_)); CHECK(array.open(QueryType::READ, EncryptionType::NO_ENCRYPTION, nullptr, 0) @@ -183,7 +183,7 @@ TEST_CASE_METHOD( aggregate_buffers, subarray, Layout::ROW_MAJOR, - condition, + predicates, default_channel_aggregates, false); Reader reader(&g_helper_stats, g_helper_logger(), params); diff --git a/test/src/unit-capi-config.cc b/test/src/unit-capi-config.cc index 17167df355d..6e6c6b12163 100644 --- a/test/src/unit-capi-config.cc +++ b/test/src/unit-capi-config.cc @@ -291,8 +291,6 @@ void check_save_to_file() { ss << "sm.memory_budget_var 10737418240\n"; ss << "sm.merge_overlapping_ranges_experimental true\n"; ss << "sm.partial_tile_offsets_loading false\n"; - ss << "sm.query.condition_evaluator " << Config::SM_QUERY_CONDITION_EVALUATOR - << "\n"; ss << "sm.query.dense.qc_coords_mode false\n"; ss << "sm.query.dense.reader refactored\n"; ss << "sm.query.sparse_global_order.preprocess_tile_merge " @@ -648,8 +646,6 @@ TEST_CASE("C API: Test config iter", "[capi][config]") { all_param_values["sm.query.sparse_global_order.preprocess_tile_merge"] = Config::SM_QUERY_SPARSE_GLOBAL_ORDER_PREPROCESS_TILE_MERGE; all_param_values["sm.query.sparse_global_order.reader"] = "refactored"; - all_param_values["sm.query.condition_evaluator"] = - Config::SM_QUERY_CONDITION_EVALUATOR; all_param_values["sm.query.sparse_unordered_with_dups.reader"] = "refactored"; all_param_values["sm.mem.consolidation.buffers_weight"] = "1"; all_param_values["sm.mem.consolidation.reader_weight"] = "3"; diff --git a/test/src/unit-query-add-predicate.cc b/test/src/unit-query-add-predicate.cc new file mode 100644 index 00000000000..3754c91381e --- /dev/null +++ b/test/src/unit-query-add-predicate.cc @@ -0,0 +1,1129 @@ +/** + * @file unit-query-add-predicate.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * Tests for the `tiledb_query_add_predicate` API. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test/support/src/array_templates.h" +#include "test/support/src/error_helpers.h" +#include "test/support/src/helpers.h" +#include "test/support/src/vfs_helpers.h" +#include "tiledb/api/c_api/array/array_api_internal.h" +#include "tiledb/sm/cpp_api/tiledb" +#include "tiledb/sm/cpp_api/tiledb_experimental" + +// this API only works if rust is enabled +#ifdef HAVE_RUST +static constexpr bool isAddPredicateEnabled = true; +#else +static constexpr bool isAddPredicateEnabled = false; +#endif + +using namespace tiledb; +using namespace tiledb::test; + +// no rapidcheck +using Asserter = AsserterCatch; + +// query result type for the array schema used in these tests +using Cells = templates::Fragment2D< + uint64_t, + uint64_t, + std::optional, + std::vector, + std::optional>; + +struct QueryArrayKWArgs { + Config config; + std::optional condition; +}; + +struct QueryAddPredicateFx { + VFSTestSetup vfs_test_setup_; + Context ctx_; + + static const Cells INPUT; + + QueryAddPredicateFx() + : ctx_(vfs_test_setup_.ctx()) { + } + + /** + * Creates and writes a two-dimension array with attributes: + * - 'a INT32' + * - 'v VARCHAR NOT NULL' + * - 'e UINT8:VARCHAR' + */ + void create_array( + const std::string& path, + tiledb_array_type_t atype, + bool allow_dups = false); + + /** + * Writes cells to a sparse array using the data in `input` + */ + template + void write_array(const std::string& path, const F& input = INPUT); + + /** + * Writes `INPUT` to saturate the ranges [[1, 4], [1, 4]] for an array + * of the schema given above + */ + void write_array_dense(const std::string& path); + + template + F query_array( + const std::string& path, + tiledb_layout_t layout, + const std::vector& predicates, + const QueryArrayKWArgs& kwargs = QueryArrayKWArgs()); +}; + +template +static F make_cells_generic( + std::vector d1, + std::vector d2, + std::vector... atts) { + F value; + value.d1() = templates::query_buffers(d1); + value.d2() = templates::query_buffers(d2); + value.atts_ = std::apply( + [](std::vector... att) { + return std::make_tuple...>( + templates::query_buffers(att)...); + }, + std::make_tuple(atts...)); + return value; +} + +static Cells make_cells( + std::vector d1, + std::vector d2, + std::vector> a, + std::vector v, + std::vector> e) { + return make_cells_generic(d1, d2, a, v, e); +} + +const Cells QueryAddPredicateFx::INPUT = make_cells( + {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4}, + {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}, + {15, + std::nullopt, + std::nullopt, + 12, + std::nullopt, + 10, + 9, + std::nullopt, + 7, + 6, + 5, + 4, + std::nullopt, + 2, + 1, + 0}, + {"one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "ten", + "eleven", + "twelve", + "thirteen", + "fourteen", + "fifteen", + "sixteen"}, + {4, + 4, + 7, + std::nullopt, + 7, + 7, + std::nullopt, + 0, + 1, + std::nullopt, + 3, + 4, + std::nullopt, + 6, + 7, + std::nullopt}); + +const Cells expect_a_is_null = make_cells( + {1, 1, 2, 2, 4}, + {2, 3, 1, 4, 1}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {"two", "three", "five", "eight", "thirteen"}, + {4, 7, 7, 0, std::nullopt}); + +const Cells expect_v_starts_with_t = make_cells( + {1, 1, 3, 3, 4}, + {2, 3, 2, 4, 1}, + {std::nullopt, std::nullopt, 6, 4, std::nullopt}, + {"two", "three", "ten", "twelve", "thirteen"}, + {4, 7, std::nullopt, 4, std::nullopt}); + +const Cells expect_e_is_null = make_cells( + {1, 2, 3, 4, 4}, + {4, 3, 2, 1, 4}, + {12, 9, 6, std::nullopt, 0}, + {"four", "seven", "ten", "thirteen", "sixteen"}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}); + +const Cells expect_a_is_null_and_row_gt_col = make_cells( + {2, 4}, + {1, 1}, + {std::nullopt, std::nullopt}, + {"five", "thirteen"}, + {7, std::nullopt}); + +const Cells expect_a_is_null_and_v_starts_with_t = make_cells( + {1, 1, 4}, + {2, 3, 1}, + {std::nullopt, std::nullopt, std::nullopt}, + {"two", "three", "thirteen"}, + {4, 7, std::nullopt}); + +[[maybe_unused]] const Cells expect_a_and_e_are_null = + make_cells({4}, {1}, {std::nullopt}, {"thirteen"}, {std::nullopt}); + +auto matchEnumerationNotSupported(std::string enumeration_name = "e") { + return Catch::Matchers::ContainsSubstring( + "Error evaluating expression: Data error: Cannot process field '" + + enumeration_name + + "': Attributes with enumerations are not supported in text predicates"); +} + +void QueryAddPredicateFx::create_array( + const std::string& path, tiledb_array_type_t atype, bool allow_dups) { + Domain domain(ctx_); + domain.add_dimension(Dimension::create(ctx_, "row", {{1, 4}}, 4)); + domain.add_dimension(Dimension::create(ctx_, "col", {{1, 4}}, 4)); + + ArraySchema schema(ctx_, atype); + schema.set_tile_order(TILEDB_ROW_MAJOR); + schema.set_cell_order(TILEDB_ROW_MAJOR); + schema.set_domain(domain); + schema.set_allows_dups(allow_dups); + + schema.add_attribute( + Attribute::create(ctx_, "a").set_nullable(true)); + schema.add_attribute(Attribute::create(ctx_, "v")); + + // enumerated attribute + std::vector us_states = { + "alabama", + "alaska", + "arizona", + "arkansas", + "california", + "colorado", + "connecticut", + "etc"}; + ArraySchemaExperimental::add_enumeration( + ctx_, + schema, + Enumeration::create(ctx_, std::string("us_states"), us_states)); + { + auto e = Attribute::create(ctx_, "e").set_nullable(true); + AttributeExperimental::set_enumeration_name(ctx_, e, "us_states"); + schema.add_attribute(e); + } + + Array::create(path, schema); +} + +template +void QueryAddPredicateFx::write_array(const std::string& path, const F& input) { + Array array(ctx_, path, TILEDB_WRITE); + Query query(ctx_, array); + + auto field_sizes = + templates::query::make_field_sizes(const_cast(input)); + templates::query::set_fields( + ctx_.ptr().get(), + query.ptr().get(), + field_sizes, + const_cast(input), + array.ptr().get()->array_schema_latest()); + query.submit(); +} + +void QueryAddPredicateFx::write_array_dense(const std::string& path) { + Array array(ctx_, path, TILEDB_WRITE); + Query query(ctx_, array); + + Subarray s(ctx_, array); + s.add_range(0, 1, 4); + s.add_range(1, 1, 4); + query.set_layout(TILEDB_ROW_MAJOR).set_subarray(s); + + using DenseFragment = templates::DenseFragment< + std::optional, + std::vector, + std::optional>; + + DenseFragment cells; + cells.atts_ = INPUT.atts_; + + auto field_sizes = templates::query::make_field_sizes(cells); + templates::query::set_fields( + ctx_.ptr().get(), + query.ptr().get(), + field_sizes, + cells, + array.ptr().get()->array_schema_latest()); + + query.submit(); +} + +template +F QueryAddPredicateFx::query_array( + const std::string& path, + tiledb_layout_t layout, + const std::vector& predicates, + const QueryArrayKWArgs& kwargs) { + Array array(ctx_, path, TILEDB_READ); + Query query(ctx_, array); + + query.set_config(kwargs.config).set_layout(layout); + + F out; + out.resize(32); + + auto field_sizes = + templates::query::make_field_sizes(out, out.size()); + + templates::query::set_fields( + ctx_.ptr().get(), + query.ptr().get(), + field_sizes, + out, + array.ptr().get()->array_schema_latest()); + + for (const std::string& pred : predicates) { + QueryExperimental::add_predicate(ctx_, query, pred); + } + + if (kwargs.condition.has_value()) { + query.set_condition(kwargs.condition.value()); + } + + if (array.schema().array_type() == TILEDB_DENSE) { + Subarray s(ctx_, array); + s.add_range(0, 1, 4); + s.add_range(1, 1, 4); + query.set_subarray(s); + } + + const auto st = query.submit(); + REQUIRE(st == Query::Status::COMPLETE); + + templates::query::resize(out, field_sizes); + + return out; +} + +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate TILEDB_RUST=OFF", + "[capi][query][add_predicate]") { + if (isAddPredicateEnabled) { + SKIP("Test for build configuration TILEDB_RUST=OFF only"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_TILEDB_RUST_OFF"); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name); + + const auto match = Catch::Matchers::ContainsSubstring( + "Cannot add query predicate: feature requires build " + "configuration '-DTILEDB_RUST=ON'"); + REQUIRE_THROWS_WITH( + query_array(array_name, TILEDB_GLOBAL_ORDER, {"a IS NULL", "row > col"}), + match); +} + +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate errors", + "[capi][query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_errors"); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name); + + SECTION("Non-read query errors") { + Array array(ctx_, array_name, TILEDB_WRITE); + Query query(ctx_, array); + + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate(ctx_, query, {"row BETWEEN 4 AND 7"}), + Catch::Matchers::ContainsSubstring( + "Cannot add query predicate; Operation only applicable to read " + "queries")); + } + + SECTION("Read query errors") { + Array array(ctx_, array_name, TILEDB_READ); + Query query(ctx_, array); + + SECTION("Null") { + const auto maybe_err = error_if_any( + ctx_.ptr().get(), + tiledb_query_add_predicate( + ctx_.ptr().get(), query.ptr().get(), nullptr)); + REQUIRE(maybe_err.has_value()); + REQUIRE_THAT( + maybe_err.value(), + Catch::Matchers::ContainsSubstring( + "Argument \"predicate\" may not be NULL")); + } + + SECTION("Syntax error") { + const std::string expect = + "Parse error: SQL error: ParserError(\"Expected: end of expression, " + "found: col at Line: 1, Column: 5\")"; + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate(ctx_, query, {"row col"}), + Catch::Matchers::ContainsSubstring(expect)); + } + + SECTION("Non-expression") { + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate( + ctx_, query, {"CREATE TABLE foo (id INT)"}), + Catch::Matchers::ContainsSubstring( + "Error adding predicate: Parse error: SQL error: " + "ParserError(\"Unsupported command in expression\")")); + } + + SECTION("Not a predicate") { + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate(ctx_, query, {"row"}), + Catch::Matchers::ContainsSubstring( + "Expression is not a predicate: found return type UInt64")); + } + + SECTION("Schema error") { + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate(ctx_, query, {"depth = 3"}), + Catch::Matchers::ContainsSubstring( + "Error adding predicate: Parse error: Schema error: No field " + "named depth. Valid fields are row, col, a, v, e.")); + } + + SECTION("Type coercion failure") { + // FIXME: from the tables CLI this gives a very different error which is + // more user-friendly, there must be some optimization pass which we are + // not doing + const std::string dferror = + "Error adding predicate: Type coercion error: Internal error: Expect " + "TypeSignatureClass::Native(LogicalType(Native(String), String)) but " + "received NativeType::UInt64, DataType: UInt64.\nThis issue was " + "likely caused by a bug in DataFusion's code. Please help us to " + "resolve this by filing a bug report in our issue tracker:"; + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate( + ctx_, query, {"starts_with(row, '1')"}), + Catch::Matchers::ContainsSubstring(dferror)); + } + + SECTION("Aggregate") { + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate(ctx_, query, {"sum(row) >= 10"}), + Catch::Matchers::ContainsSubstring( + "Expression contains aggregate functions which are not supported " + "in predicates")); + } + } +} + +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate to in progress query", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_in_progress"); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name); + + Array array(ctx_, array_name, TILEDB_READ); + Query query(ctx_, array); + + query.set_layout(TILEDB_GLOBAL_ORDER); + + Cells out; + out.resize(INPUT.size() - 1); + + auto field_sizes = + templates::query::make_field_sizes(out, out.size()); + + templates::query::set_fields( + ctx_.ptr().get(), + query.ptr().get(), + field_sizes, + out, + array.ptr().get()->array_schema_latest()); + + const auto st = query.submit(); + REQUIRE(st == Query::Status::INCOMPLETE); + + const auto expect_err = Catch::Matchers::ContainsSubstring( + "Cannot add query predicate; Adding a predicate to an already " + "initialized query is not supported."); + REQUIRE_THROWS_WITH( + QueryExperimental::add_predicate(ctx_, query, "row = col"), expect_err); +} + +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate dense array", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_dense"); + + create_array(array_name, TILEDB_DENSE); + write_array_dense(array_name); + + // FIXME: error messages + REQUIRE_THROWS(query_array(array_name, TILEDB_UNORDERED, {"row >= 3"})); + REQUIRE_THROWS(query_array(array_name, TILEDB_ROW_MAJOR, {"row >= 3"})); + REQUIRE_THROWS(query_array(array_name, TILEDB_COL_MAJOR, {"row >= 3"})); + REQUIRE_THROWS(query_array(array_name, TILEDB_GLOBAL_ORDER, {"row >= 3"})); + REQUIRE_THROWS(query_array(array_name, TILEDB_HILBERT, {"row >= 3"})); +} + +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate sparse unsupported query order", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_sparse_unsupported"); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name); + + const auto match = Catch::Matchers::ContainsSubstring( + "tiledb_query_add_predicate is not supported for this query"); + + SECTION("Row major") { + REQUIRE_THROWS_WITH( + query_array(array_name, TILEDB_ROW_MAJOR, {"a IS NULL", "row > col"}), + match); + } + + SECTION("Col major") { + REQUIRE_THROWS_WITH( + query_array(array_name, TILEDB_COL_MAJOR, {"a IS NULL", "row > col"}), + match); + } + + SECTION("Legacy global order") { + Config qconf; + qconf["sm.query.sparse_global_order.reader"] = "legacy"; + + QueryArrayKWArgs kwargs; + kwargs.config = qconf; + + REQUIRE_THROWS_WITH( + query_array( + array_name, + TILEDB_GLOBAL_ORDER, + {"a IS NULL", "row > col"}, + kwargs), + match); + } +} + +/** + * Tests applying datafusion predicates to sparse global order reader. + * + * NB: `WHERE TRUE` and `WHERE FALSE` and `WHERE NULL` may look silly + * but they exercise the `ColumnarValue::Scalar` branches of evaluation + * which become increasingly important once constant folding is + * introduced. + */ +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate sparse global order", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_sparse_global_order"); + + const auto query_order = GENERATE(TILEDB_GLOBAL_ORDER, TILEDB_UNORDERED); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name); + + SECTION("WHERE TRUE") { + const auto result = query_array(array_name, query_order, {"TRUE"}); + CHECK(result == INPUT); + } + + SECTION("WHERE FALSE") { + const auto result = query_array(array_name, query_order, {"FALSE"}); + CHECK(result == Cells()); + } + + SECTION("WHERE NULL") { + const auto result = query_array(array_name, query_order, {"NULL"}); + CHECK(result == Cells()); + } + + SECTION("WHERE a IS NOT NULL") { + const Cells expect = make_cells( + {1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4}, + {1, 4, 2, 3, 1, 2, 3, 4, 2, 3, 4}, + {15, 12, 10, 9, 7, 6, 5, 4, 2, 1, 0}, + {"one", + "four", + "six", + "seven", + "nine", + "ten", + "eleven", + "twelve", + "fourteen", + "fifteen", + "sixteen"}, + {4, + std::nullopt, + 7, + std::nullopt, + 1, + std::nullopt, + 3, + 4, + 6, + 7, + std::nullopt}); + + const auto result = query_array(array_name, query_order, {"a IS NOT NULL"}); + CHECK(result == expect); + } + + SECTION("WHERE v < 'fourteen'") { + const Cells expect = make_cells( + {1, 2, 2, 3, 4}, + {4, 1, 4, 3, 3}, + {12, std::nullopt, std::nullopt, 5, 1}, + {"four", "five", "eight", "eleven", "fifteen"}, + {std::nullopt, 7, 0, 3, 7}); + + const auto result = + query_array(array_name, query_order, {"v < 'fourteen'"}); + CHECK(result == expect); + } + + SECTION("WHERE row + col <= 4") { + const Cells expect = make_cells( + {1, 1, 1, 2, 2, 3}, + {1, 2, 3, 1, 2, 1}, + {15, std::nullopt, std::nullopt, std::nullopt, 10, 7}, + {"one", "two", "three", "five", "six", "nine"}, + {4, 4, 7, 7, 7, 1}); + + const auto result = + query_array(array_name, query_order, {"row + col <= 4"}); + CHECK(result == expect); + } + + SECTION("WHERE a IS NULL AND row > col") { + const auto result = + query_array(array_name, query_order, {"a IS NULL", "row > col"}); + CHECK(result == expect_a_is_null_and_row_gt_col); + } + + SECTION("WHERE a IS NULL AND TRUE AND row > col") { + const auto result = query_array( + array_name, query_order, {"a IS NULL", "TRUE", "row > col"}); + CHECK(result == expect_a_is_null_and_row_gt_col); + } + + SECTION("WHERE a IS NULL AND row > col AND TRUE") { + const auto result = query_array( + array_name, query_order, {"a IS NULL", "row > col", "TRUE"}); + CHECK(result == expect_a_is_null_and_row_gt_col); + } + + SECTION("WHERE a IS NULL AND FALSE AND row > col") { + const auto result = query_array( + array_name, query_order, {"a IS NULL", "FALSE", "row > col"}); + CHECK(result == Cells()); + } + + SECTION("WHERE a IS NULL AND NULL AND row > col") { + const auto result = query_array( + array_name, query_order, {"a IS NULL", "FALSE", "row > col"}); + CHECK(result == Cells()); + } + + SECTION("WHERE coalesce(a, row) > col") { + const Cells expect = make_cells( + {1, 1, 2, 2, 2, 3, 3, 3, 4}, + {1, 4, 1, 2, 3, 1, 2, 3, 1}, + {15, 12, std::nullopt, 10, 9, 7, 6, 5, std::nullopt}, + {"one", + "four", + "five", + "six", + "seven", + "nine", + "ten", + "eleven", + "thirteen"}, + {4, + std::nullopt, + 7, + 7, + std::nullopt, + 1, + std::nullopt, + 3, + std::nullopt}); + + const auto result = + query_array(array_name, query_order, {"coalesce(a, row) > col"}); + CHECK(result == expect); + } + + SECTION("WHERE e < 'california'") { + // enumeration not supported yet + REQUIRE_THROWS_WITH( + query_array(array_name, query_order, {"e < 'california'"}), + matchEnumerationNotSupported()); + } +} + +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate sparse unordered with dups", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = vfs_test_setup_.array_uri( + "test_query_add_predicate_sparse_unordered_with_dups"); + + create_array(array_name, TILEDB_SPARSE, true); + + const auto query_order = TILEDB_UNORDERED; + + const Cells f2 = make_cells( + {1, 1, 2, 2, 3, 3, 4, 4}, + {1, 4, 2, 3, 1, 4, 2, 3}, + {-1, std::nullopt, std::nullopt, -4, std::nullopt, -6, -7, std::nullopt}, + {"ένα", "δύο", "τρία", "τέσσερα", "πέντε", "έξι", "επτά", "οκτώ"}, + {0, 7, 1, std::nullopt, 2, 6, std::nullopt, 3}); + const Cells f3 = make_cells( + {1, 1, 2, 2, 3, 3, 4, 4}, + {1, 2, 3, 4, 1, 2, 3, 4}, + {-9, -10, -11, -12, std::nullopt, -14, -15, -16}, + {"uno", "dos", "tres", "quatro", "cinco", "seis", "siete", "ocho"}, + {7, 0, 6, std::nullopt, 1, 5, std::nullopt, 2}); + + // fragment 1: base input + write_array(array_name); + write_array(array_name, f2); + write_array(array_name, f3); + + SECTION("WHERE TRUE") { + const Cells expect = templates::query::concat({INPUT, f2, f3}); + const auto result = query_array(array_name, query_order, {"TRUE"}); + CHECK(result == expect); + } + + SECTION("WHERE v < 'fourteen'") { + const Cells expect = make_cells( + {1, 2, 2, 3, 4, 1, 3}, + {4, 1, 4, 3, 3, 2, 1}, + {12, std::nullopt, std::nullopt, 5, 1, -10, std::nullopt}, + {"four", "five", "eight", "eleven", "fifteen", "dos", "cinco"}, + {std::nullopt, 7, 0, 3, 7, 0, 1}); + + const auto result = + query_array(array_name, query_order, {"v < 'fourteen'"}); + CHECK(result == expect); + } + + SECTION("WHERE row + col <= 4") { + const Cells expect = make_cells( + {1, 1, 1, 2, 2, 3, 1, 2, 3, 1, 1, 3}, + {1, 2, 3, 1, 2, 1, 1, 2, 1, 1, 2, 1}, + {15, + std::nullopt, + std::nullopt, + std::nullopt, + 10, + 7, + -1, + std::nullopt, + std::nullopt, + -9, + -10, + std::nullopt}, + {"one", + "two", + "three", + "five", + "six", + "nine", + "ένα", + "τρία", + "πέντε", + "uno", + "dos", + "cinco"}, + {4, 4, 7, 7, 7, 1, 0, 1, 2, 7, 0, 1}); + + const auto result = + query_array(array_name, query_order, {"row + col <= 4"}); + CHECK(result == expect); + } + + SECTION("WHERE a IS NULL AND row > col") { + const Cells expect = make_cells( + {2, 4, 3, 4, 3}, + {1, 1, 1, 3, 1}, + {std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + {"five", "thirteen", "πέντε", "οκτώ", "cinco"}, + {7, std::nullopt, 2, 3, 1}); + + const auto result = + query_array(array_name, query_order, {"a IS NULL", "row > col"}); + CHECK(result == expect); + } + + SECTION("WHERE octet_length(v) > char_length(v)") { + const Cells expect = f2; + + const auto result = query_array( + array_name, query_order, {"octet_length(v) > char_length(v)"}); + CHECK(result == expect); + } + + SECTION("WHERE e < 'california'") { + // enumeration not supported yet + REQUIRE_THROWS_WITH( + query_array(array_name, query_order, {"e < 'california'"}), + matchEnumerationNotSupported()); + } +} + +/** + * Test that we do something reasonable when evaluating a predicate + * on an array whose schema evolved to have a different type for the + * same attribute + */ +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate evolved schema", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_evolution"); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name, INPUT); + + { + ArraySchemaEvolution(ctx_).drop_attribute("a").array_evolve(array_name); + + ArraySchemaEvolution(ctx_) + .add_attribute(Attribute::create(ctx_, "a")) + .array_evolve(array_name); + } + + using CellsEvolved = templates::Fragment2D< + uint64_t, + uint64_t, + std::string, + std::optional, + std::string>; + + const CellsEvolved f2 = make_cells_generic< + CellsEvolved, + std::string, + std::optional, + std::string>( + {1, 2, 3, 4}, + {1, 2, 3, 4}, + {"seventeen", "eighteen", "nineteen", "twenty"}, + {0, 1, 2, 3}, + {"00", "01", "10", "11"}); + write_array(array_name, f2); + + SECTION("WHERE a LIKE '%1'") { + CellsEvolved expect = make_cells_generic< + CellsEvolved, + std::string, + std::optional, + std::string>( + {2, 4}, {2, 4}, {"eighteen", "twenty"}, {1, 3}, {"01", "11"}); + + const auto result = query_array( + array_name, TILEDB_GLOBAL_ORDER, {"a LIKE '%1'"}); + CHECK(result == expect); + } + + SECTION("WHERE a & 1 = 0") { + REQUIRE_THROWS_WITH( + query_array( + array_name, TILEDB_GLOBAL_ORDER, {"a & 1 = 0"}), + Catch::Matchers::ContainsSubstring( + "Error: Error adding predicate: Type coercion error: Error during " + "planning: Cannot infer common type for bitwise operation " + "LargeUtf8 & Int64")); + } +} + +/** + * Test combinations of query conditions and predicates. + * + * While predicates are the more user-friendly option, query conditions are + * still around for historical reasons, and may also be more performant. + * Whatever the case, we don't explicity disable combining these features, + * and so we should observe here that they mix well and the cells which come out + * of a query using both is the logical AND of both types of predicates. + */ +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate with query condition", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const auto query_order = TILEDB_GLOBAL_ORDER; + + const std::string array_name = vfs_test_setup_.array_uri( + "test_query_add_predicate_with_query_condition"); + + create_array(array_name, TILEDB_SPARSE); + write_array(array_name); + + SECTION("Same") { + QueryArrayKWArgs kwargs; + kwargs.condition.emplace(ctx_); + kwargs.condition.value().init("a", nullptr, 0, TILEDB_EQ); // `a IS NULL` + + const auto qcresult = query_array(array_name, query_order, {}, kwargs); + CHECK(qcresult == expect_a_is_null); + + const auto predresult = query_array(array_name, query_order, {"a IS NULL"}); + CHECK(predresult == expect_a_is_null); + + const auto andresult = + query_array(array_name, query_order, {"a IS NULL"}, kwargs); + CHECK(andresult == expect_a_is_null); + } + + SECTION("Disjoint") { + QueryArrayKWArgs kwargs; + kwargs.condition.emplace(ctx_); + kwargs.condition.value().init("a", nullptr, 0, TILEDB_EQ); // `a IS NULL` + + const auto qcresult = query_array(array_name, query_order, {}, kwargs); + CHECK(qcresult == expect_a_is_null); + + const auto predresult = + query_array(array_name, query_order, {"starts_with(v, 't')"}); + CHECK(predresult == expect_v_starts_with_t); + + const auto andresult = + query_array(array_name, query_order, {"starts_with(v, 't')"}, kwargs); + CHECK(andresult == expect_a_is_null_and_v_starts_with_t); + } + + SECTION("Enumeration in query condition") { + QueryArrayKWArgs kwargs; + kwargs.condition.emplace(ctx_); + kwargs.condition.value().init("e", nullptr, 0, TILEDB_EQ); // `e IS NULL` + + const auto qcresult = query_array(array_name, query_order, {}, kwargs); + CHECK(qcresult == expect_e_is_null); + + const auto predresult = query_array(array_name, query_order, {"a IS NULL"}); + CHECK(predresult == expect_a_is_null); + + const auto andresult = + query_array(array_name, query_order, {"a IS NULL"}, kwargs); + CHECK(andresult == expect_a_and_e_are_null); + } + + SECTION("Enumeration in predicate") { + QueryArrayKWArgs kwargs; + kwargs.condition.emplace(ctx_); + kwargs.condition.value().init("a", nullptr, 0, TILEDB_EQ); // `a IS NULL` + + const auto qcresult = query_array(array_name, query_order, {}, kwargs); + CHECK(qcresult == expect_a_is_null); + + REQUIRE_THROWS_WITH( + query_array(array_name, query_order, {"e IS NULL"}), + matchEnumerationNotSupported()); + REQUIRE_THROWS_WITH( + query_array(array_name, query_order, {"e IS NULL"}, kwargs), + matchEnumerationNotSupported()); + } +} + +/** + * Test that field names with special characters can be used by enclosing them + * in quotes + */ +TEST_CASE_METHOD( + QueryAddPredicateFx, + "Query add predicate field name escaping", + "[query][add_predicate]") { + if (!isAddPredicateEnabled) { + SKIP("tiledb_query_add_predicate requires -DTILEDB_RUST=ON"); + } + + const std::string array_name = + vfs_test_setup_.array_uri("test_query_add_predicate_field_name_escape"); + + create_array(array_name, TILEDB_SPARSE); + + // re-name fields to have special characters in them + // (preserve order/types of attributes so we can continue using INPUT) + { + auto enmr = ArrayExperimental::get_enumeration( + ctx_, Array(ctx_, array_name, TILEDB_READ), "us_states"); + + // first drop the old enumeration due to error adding an attribute trying to + // use it: cannot add an attribute using an enumeration which isn't loaded + ArraySchemaEvolution(ctx_) + .drop_attribute("e") + .drop_enumeration("us_states") + .array_evolve(array_name); + + auto evolve = + ArraySchemaEvolution(ctx_) + .drop_attribute("a") + .drop_attribute("v") + .add_attribute( + Attribute::create(ctx_, "'a'").set_nullable(true)) + .add_attribute(Attribute::create(ctx_, "\"v\"")); + + auto e = Attribute::create(ctx_, "e e").set_nullable(true); + AttributeExperimental::set_enumeration_name(ctx_, e, "us_states"); + + evolve.add_attribute(e).add_enumeration(enmr); + + evolve.array_evolve(array_name); + } + + write_array(array_name); + + const auto query_order = TILEDB_GLOBAL_ORDER; + + SECTION("WHERE 'a' IS NULL") { + const auto result = + query_array(array_name, query_order, {"\"'a'\" IS NULL"}); + CHECK(result == expect_a_is_null); + } + + SECTION("WHERE starts_with(\"v\", 't')") { + const auto result = query_array( + array_name, query_order, {"starts_with(\"\"\"v\"\"\", 't')"}); + CHECK(result == expect_v_starts_with_t); + } + + SECTION("WHERE \"e e\" IS NULL") { + REQUIRE_THROWS_WITH( + query_array(array_name, query_order, {"\"e e\" IS NULL"}), + matchEnumerationNotSupported("e e")); + } + + SECTION("Query condition rewrite") { + QueryArrayKWArgs kwargs; + kwargs.condition.emplace(ctx_); + kwargs.condition.value().init( + "'a'", nullptr, 0, TILEDB_EQ); // `"'a'" IS NULL` + + const auto qcresult = query_array(array_name, query_order, {}, kwargs); + CHECK(qcresult == expect_a_is_null); + + const std::string pred = "starts_with(\"\"\"v\"\"\", 't')"; + + const auto predresult = query_array(array_name, query_order, {pred}); + CHECK(predresult == expect_v_starts_with_t); + + const auto andresult = query_array(array_name, query_order, {pred}, kwargs); + CHECK(andresult == expect_a_is_null_and_v_starts_with_t); + } +} diff --git a/test/src/unit-sparse-global-order-reader.cc b/test/src/unit-sparse-global-order-reader.cc index d89ccd4da3e..517a16e4104 100644 --- a/test/src/unit-sparse-global-order-reader.cc +++ b/test/src/unit-sparse-global-order-reader.cc @@ -36,6 +36,7 @@ #include "test/support/src/array_templates.h" #include "test/support/src/error_helpers.h" #include "test/support/src/helpers.h" +#include "test/support/src/query_helpers.h" #include "test/support/src/vfs_helpers.h" #include "tiledb/api/c_api/array/array_api_internal.h" #include "tiledb/sm/c_api/tiledb.h" @@ -147,6 +148,7 @@ struct FxRun1D { // for evaluating std::optional> condition; + bool condition_use_datafusion = false; DefaultArray1DConfig array; SparseGlobalOrderReaderMemoryBudget memory; @@ -258,6 +260,7 @@ struct FxRun2D { std::optional>>> subarray; std::optional> condition; + bool condition_use_datafusion; size_t num_user_cells; @@ -271,7 +274,8 @@ struct FxRun2D { SparseGlobalOrderReaderMemoryBudget memory; FxRun2D() - : capacity(64) + : condition_use_datafusion(false) + , capacity(64) , allow_dups(true) , tile_order_(TILEDB_ROW_MAJOR) , cell_order_(TILEDB_ROW_MAJOR) { @@ -509,7 +513,8 @@ struct CSparseGlobalOrderFx { template DeleteArrayGuard run_create(Instance& instance); template - void run_execute(Instance& instance); + std::optional run_execute( + Instance& instance); /** * Runs an input against a fresh array. @@ -517,7 +522,7 @@ struct CSparseGlobalOrderFx { * and checks that what we read out matches what we put in. */ template - void run(Instance& instance); + std::optional run(Instance& instance); template std::optional error_if_any(CAPIReturn apirc) const; @@ -3443,11 +3448,12 @@ void CSparseGlobalOrderFx::create_array(const Instance& instance) { * expected result order computed from the input data. */ template -void CSparseGlobalOrderFx::run(Instance& instance) { +std::optional CSparseGlobalOrderFx::run( + Instance& instance) { reset_config(); auto tmparray = run_create(instance); - run_execute(instance); + return run_execute(instance); } template @@ -3474,7 +3480,8 @@ DeleteArrayGuard CSparseGlobalOrderFx::run_create(Instance& instance) { } template -void CSparseGlobalOrderFx::run_execute(Instance& instance) { +std::optional +CSparseGlobalOrderFx::run_execute(Instance& instance) { ASSERTER(instance.num_user_cells > 0); std::decay_t expect; @@ -3495,12 +3502,6 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { expect_fragment.insert(expect_fragment.end(), fragment.size(), f); } else { std::vector accept; - std::optional< - templates::QueryConditionEvalSchema> - eval; - if (instance.condition.has_value()) { - eval.emplace(); - } for (uint64_t i = 0; i < fragment.size(); i++) { if (!instance.pass_subarray(fragment, i)) { continue; @@ -3602,10 +3603,17 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { } if (instance.condition.has_value()) { - tiledb::sm::QueryCondition qc(instance.condition->get()->clone()); - const auto rc = - query->query_->set_condition(qc); // SAFETY: this performs a deep copy - ASSERTER(rc.to_string() == "Ok"); + if (instance.condition_use_datafusion) { + const std::string sql = tiledb::test::to_sql( + *instance.condition.value().get(), + array->array()->array_schema_latest()); + TRY(context(), tiledb_query_add_predicate(context(), query, sql.c_str())); + } else { + tiledb::sm::QueryCondition qc(instance.condition->get()->clone()); + const auto rc = query->query_->set_condition( + qc); // SAFETY: this performs a deep copy + ASSERTER(rc.to_string() == "Ok"); + } } // Prepare output buffer @@ -3655,7 +3663,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { } } tiledb_query_free(&query); - return; + return std::nullopt; } if (err->find("Cannot set array memory budget") != std::string::npos) { if (!vfs_test_setup_.is_rest()) { @@ -3668,7 +3676,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { ASSERTER(array_usage > array_budget); } tiledb_query_free(&query); - return; + return std::nullopt; } if constexpr (std::is_same_v) { if (err->find("Cannot allocate space for preprocess result tile ID " @@ -3677,13 +3685,13 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { // we can probably make some assertions about what this should // have looked like but for now we'll let it go tiledb_query_free(&query); - return; + return std::nullopt; } if (err->find("Cannot load tile offsets") != std::string::npos) { // not enough memory budget for tile offsets, don't bother // asserting about it (for now?) tiledb_query_free(&query); - return; + return std::nullopt; } } } @@ -3797,6 +3805,8 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) { ASSERTER(can_complete.has_value()); } } + + return expect; } // rapidcheck generators and Arbitrary specializations @@ -3878,20 +3888,22 @@ struct Arbitrary> { auto num_user_cells = gen::inRange(1, 8 * 1024 * 1024); return gen::apply( - [](auto fragments, int num_user_cells) { + [](auto fragments, int num_user_cells, bool condition_use_datafusion) { FxRun1D instance; instance.array.allow_dups_ = std::get<0>(fragments); instance.array.dimension_ = std::get<1>(fragments); instance.subarray = std::get<2>(fragments); instance.fragments = std::move(std::get<3>(fragments).first); instance.condition = std::move(std::get<3>(fragments).second); + instance.condition_use_datafusion = condition_use_datafusion; instance.num_user_cells = num_user_cells; return instance; }, fragments, - num_user_cells); + num_user_cells, + gen::arbitrary()); } }; @@ -3987,7 +3999,8 @@ struct Arbitrary { [](auto fragments, int num_user_cells, tiledb_layout_t tile_order, - tiledb_layout_t cell_order) { + tiledb_layout_t cell_order, + bool condition_use_datafusion) { FxRun2D instance; instance.allow_dups = std::get<0>(fragments); instance.d1 = std::get<1>(fragments); @@ -3995,6 +4008,7 @@ struct Arbitrary { instance.subarray = std::get<3>(fragments); instance.fragments = std::move(std::get<4>(fragments).first); instance.condition = std::move(std::get<4>(fragments).second); + instance.condition_use_datafusion = condition_use_datafusion; // TODO: capacity instance.num_user_cells = num_user_cells; @@ -4006,7 +4020,8 @@ struct Arbitrary { fragments, num_user_cells, tile_order, - cell_order); + cell_order, + gen::arbitrary()); } }; diff --git a/test/support/CMakeLists.txt b/test/support/CMakeLists.txt index caae00aa1a9..2c7bbc6d1c4 100644 --- a/test/support/CMakeLists.txt +++ b/test/support/CMakeLists.txt @@ -47,6 +47,7 @@ set(TILEDB_TEST_SUPPORT_SOURCES src/helpers-dimension.h src/mem_helpers.h src/mem_helpers.cc + src/query_helpers.cc src/serialization_wrappers.cc src/stats.cc src/temporary_local_directory.cc diff --git a/test/support/src/array_templates.h b/test/support/src/array_templates.h index 9dc422bc48a..7b389f3f44b 100644 --- a/test/support/src/array_templates.h +++ b/test/support/src/array_templates.h @@ -36,6 +36,7 @@ #include "tiledb.h" #include "tiledb/common/unreachable.h" +#include "tiledb/sm/array_schema/array_schema.h" #include "tiledb/sm/cpp_api/tiledb" #include "tiledb/sm/query/ast/query_ast.h" #include "tiledb/type/datatype_traits.h" @@ -502,6 +503,18 @@ struct query_buffers> { query_buffers() { } + query_buffers(std::vector> cells) { + for (const auto& cell : cells) { + if (cell.has_value()) { + values_.push_back(cell.value()); + validity_.push_back(1); + } else { + values_.push_back(T()); + validity_.push_back(0); + } + } + } + query_buffers(const self_type& other) = default; bool operator==(const self_type& other) const = default; @@ -1264,6 +1277,26 @@ struct Fragment { } }; +/** + * Specialization of `query_buffers` for variable-length non-nullable cells + * whose physical type is `char` and thus the "logical type" of each cell + * is `std::string`. + * + * See `query_buffers>`. + */ +template <> +struct query_buffers : public query_buffers> { + query_buffers() { + } + + query_buffers(std::vector cells) { + for (const auto& cell : cells) { + offsets_.push_back(values_.size()); + values_.insert(values_.end(), cell.begin(), cell.end()); + } + } +}; + /** * Data for a one-dimensional array */ @@ -1308,6 +1341,8 @@ struct Fragment2D : public Fragment, std::tuple> { template struct Fragment3D : public Fragment, std::tuple> { + using self_type = Fragment3D; + const query_buffers& d1() const { return std::get<0>(this->dimensions()); } @@ -1331,8 +1366,13 @@ struct Fragment3D query_buffers& d3() { return std::get<2>(this->dimensions()); } + + bool operator==(const self_type& other) const = default; }; +template +struct DenseFragment : public Fragment, std::tuple> {}; + /** * Binds variadic field data to a tiledb query */ @@ -1611,6 +1651,29 @@ void set_fields( } } +/** + * Adds the buffers from `fragment` to a query, + * using `schema` to look up field names for the positional fields of `F`. + */ +template +void set_fields( + tiledb_ctx_t* ctx, + tiledb_query_t* query, + fragment_field_sizes_t& field_sizes, + F& fragment, + const tiledb::sm::ArraySchema& schema, + const fragment_field_sizes_t& field_cursors = + fragment_field_sizes_t()) { + std::function dim_name = [&](unsigned dim) { + return schema.domain().dimension_ptr(dim)->name(); + }; + std::function att_name = [&](unsigned att) { + return schema.attribute(att)->name(); + }; + return set_fields( + ctx, query, field_sizes, fragment, dim_name, att_name, field_cursors); +} + /** * @return the number of cells written into `fields` by a read query */ @@ -1621,6 +1684,21 @@ uint64_t num_cells(const F& fragment, const auto& field_sizes) { }(std::tuple_cat(fragment.dimensions(), fragment.attributes())); } +/** + * @return the concatenation of one or more fragments + */ +template +F concat(std::initializer_list fragments) { + F concat; + auto d = concat.dimensions(); + auto a = concat.attributes(); + for (const F& fragment : fragments) { + stdx::extend(d, fragment.dimensions()); + stdx::extend(a, fragment.attributes()); + } + return concat; +} + /** * Writes a fragment to a sparse array. */ diff --git a/test/support/src/query_helpers.cc b/test/support/src/query_helpers.cc new file mode 100644 index 00000000000..99170bc5216 --- /dev/null +++ b/test/support/src/query_helpers.cc @@ -0,0 +1,108 @@ +/** + * @file query_helpers.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + */ +#include "test/support/src/query_helpers.h" +#include "tiledb/stdx/utility/to_underlying.h" +#include "tiledb/type/apply_with_type.h" + +#include + +namespace tiledb::test { + +using namespace tiledb::sm; + +/** + * @return a SQL representation of a `QueryConditionOp` + */ +static const char* to_sql_op(QueryConditionOp op) { + switch (op) { + case QueryConditionOp::LT: + return "<"; + case QueryConditionOp::LE: + return "<="; + case QueryConditionOp::EQ: + return "="; + case QueryConditionOp::GE: + return ">="; + case QueryConditionOp::GT: + return ">"; + case QueryConditionOp::NE: + return "<>"; + default: + throw std::logic_error( + "Invalid query condition op: " + + std::to_string(stdx::to_underlying(op))); + } +} + +/** + * @return a SQL representation of the query condition syntax tree + */ +std::string to_sql(const ASTNode& ast, const ArraySchema& schema) { + const ASTNodeVal* valnode = static_cast(&ast); + const ASTNodeExpr* exprnode = dynamic_cast(&ast); + + std::stringstream os; + if (valnode) { + const auto fname = valnode->get_field_name(); + const auto op = valnode->get_op(); + const auto bytes = valnode->get_data(); + + std::stringstream value; + + apply_with_type( + [&](auto t) { + using T = decltype(t); + value << *reinterpret_cast(bytes.data()); + }, + schema.type(fname)); + + os << fname << " " << to_sql_op(op) << " " << value.str(); + } else if (exprnode) { + const auto op = exprnode->get_combination_op(); + const auto& children = exprnode->get_children(); + if (op == QueryConditionCombinationOp::NOT) { + assert(children.size() == 1); + os << "NOT "; + } + for (unsigned i = 0; i < children.size(); i++) { + if (i != 0) { + os << " " << query_condition_combination_op_str(op) << " "; + } + os << "(" << to_sql(*children[i].get(), schema) << ")"; + } + } else { + throw std::logic_error( + "Invalid query condition syntax tree node: " + + std::string(typeid(ast).name())); + } + return os.str(); +} + +} // namespace tiledb::test diff --git a/test/support/src/query_helpers.h b/test/support/src/query_helpers.h new file mode 100644 index 00000000000..ae43767124b --- /dev/null +++ b/test/support/src/query_helpers.h @@ -0,0 +1,50 @@ +/** + * @file test/support/src/query_helpers.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2025 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + */ + +#ifndef TILEDB_TEST_SUPPORT_QUERY_CONDITION_H +#define TILEDB_TEST_SUPPORT_QUERY_CONDITION_H + +#include "tiledb/sm/array_schema/array_schema.h" +#include "tiledb/sm/query/ast/query_ast.h" + +#include + +namespace tiledb::test { + +/** + * @return a SQL representation of the query condition syntax tree + */ +std::string to_sql( + const tiledb::sm::ASTNode& ast, + const tiledb::sm::ArraySchema& array_schema); + +} // namespace tiledb::test + +#endif diff --git a/test/support/src/vfs_helpers.h b/test/support/src/vfs_helpers.h index 7d9fe2053b8..44ccf105927 100644 --- a/test/support/src/vfs_helpers.h +++ b/test/support/src/vfs_helpers.h @@ -901,7 +901,7 @@ struct VFSTestSetup { return "tiledb://unit-workspace/unit-teamspace/" + backend + name + label; } - Context ctx() { + Context ctx() const { return Context(ctx_c, false); } diff --git a/tiledb/oxidize/CMakeLists.txt b/tiledb/oxidize/CMakeLists.txt index d63a0ae0d25..0d94656ac25 100644 --- a/tiledb/oxidize/CMakeLists.txt +++ b/tiledb/oxidize/CMakeLists.txt @@ -21,13 +21,6 @@ cxxbridge( lib.rs ) -cxxbridge( - NAME - expr - SOURCES - lib.rs -) - cxxbridge( NAME cxx-interface @@ -35,12 +28,20 @@ cxxbridge( array_schema.cc common/memory_tracker.rs sm/array_schema/mod.rs + sm/buffer.rs sm/enums/mod.rs sm/query/readers/mod.rs sm/query/ast/mod.rs sm/misc/mod.rs sm/tile/mod.rs -) +) + +cxxbridge( + NAME + query-predicates + SOURCES + lib.rs +) cxxbridge( NAME @@ -60,7 +61,7 @@ oxidize( EXPORT arrow cxx-interface - expr + query-predicates ) oxidize( @@ -78,7 +79,7 @@ oxidize( EXPORT arrow cxx-interface - expr + query-predicates test-support-cxx-interface ) diff --git a/tiledb/oxidize/Cargo.lock b/tiledb/oxidize/Cargo.lock index 65caa8f6d35..ac0c25b3733 100644 --- a/tiledb/oxidize/Cargo.lock +++ b/tiledb/oxidize/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "addr2line" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -25,7 +16,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -33,9 +24,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -61,12 +52,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -78,15 +63,24 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] [[package]] name = "arrayref" @@ -102,9 +96,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" dependencies = [ "arrow-arith", "arrow-array", @@ -123,9 +117,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" dependencies = [ "arrow-array", "arrow-buffer", @@ -137,9 +131,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" dependencies = [ "ahash", "arrow-buffer", @@ -148,15 +142,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.15.4", + "hashbrown 0.16.0", "num", ] [[package]] name = "arrow-buffer" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" dependencies = [ "bytes", "half", @@ -165,9 +159,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" dependencies = [ "arrow-array", "arrow-buffer", @@ -186,9 +180,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a" +checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" dependencies = [ "arrow-array", "arrow-cast", @@ -196,15 +190,14 @@ dependencies = [ "chrono", "csv", "csv-core", - "lazy_static", "regex", ] [[package]] name = "arrow-data" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -214,23 +207,25 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e" +checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", + "arrow-select", "flatbuffers", "lz4_flex", + "zstd", ] [[package]] name = "arrow-json" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033" +checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" dependencies = [ "arrow-array", "arrow-buffer", @@ -250,9 +245,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" dependencies = [ "arrow-array", "arrow-buffer", @@ -263,9 +258,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" dependencies = [ "arrow-array", "arrow-buffer", @@ -276,15 +271,19 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +dependencies = [ + "serde", + "serde_json", +] [[package]] name = "arrow-select" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" dependencies = [ "ahash", "arrow-array", @@ -296,9 +295,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" dependencies = [ "arrow-array", "arrow-buffer", @@ -317,7 +316,7 @@ version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "bzip2", + "bzip2 0.5.2", "flate2", "futures-core", "memchr", @@ -330,9 +329,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.88" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", @@ -354,21 +353,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "backtrace" -version = "0.3.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - [[package]] name = "base64" version = "0.22.1" @@ -377,9 +361,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ "autocfg", "libm", @@ -405,9 +389,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "blake2" @@ -442,9 +426,9 @@ dependencies = [ [[package]] name = "brotli" -version = "8.0.1" +version = "8.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -463,9 +447,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.18.1" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "byteorder" @@ -488,6 +472,15 @@ dependencies = [ "bzip2-sys", ] +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + [[package]] name = "bzip2-sys" version = "0.1.13+1.0.8" @@ -500,10 +493,11 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.27" +version = "1.2.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +checksum = "35900b6c8d709fb1d854671ae27aeaa9eec2f8b01b364e1619a40da3e6fe2afe" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -512,7 +506,7 @@ dependencies = [ [[package]] name = "cells" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" dependencies = [ "paste", "proptest", @@ -524,17 +518,16 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "num-traits", "windows-link", @@ -542,39 +535,28 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ "chrono", - "chrono-tz-build", "phf", ] -[[package]] -name = "chrono-tz-build" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402" -dependencies = [ - "parse-zoneinfo", - "phf_codegen", -] - [[package]] name = "clap" -version = "4.5.40" +version = "4.5.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" +checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.40" +version = "4.5.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" +checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" dependencies = [ "anstyle", "clap_lex", @@ -583,39 +565,30 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" - -[[package]] -name = "codespan-reporting" -version = "0.11.1" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width 0.1.14", -] +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "codespan-reporting" -version = "0.12.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" +checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width 0.2.1", + "unicode-width", ] [[package]] name = "comfy-table" -version = "7.1.4" +version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ - "unicode-segmentation", - "unicode-width 0.2.1", + "strum", + "strum_macros", + "unicode-width", ] [[package]] @@ -661,9 +634,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -676,15 +649,15 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -692,47 +665,49 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] [[package]] name = "cxx" -version = "1.0.138" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3956d60afa98653c5a57f60d7056edd513bfe0307ef6fb06f6167400c3884459" +checksum = "47ac4eaf7ebe29e92f1b091ceefec7710a53a6f6154b2460afda626c113b65b9" dependencies = [ "cc", + "cxx-build", "cxxbridge-cmd", "cxxbridge-flags", "cxxbridge-macro", - "foldhash", + "foldhash 0.2.0", "link-cplusplus", ] [[package]] name = "cxx-build" -version = "1.0.158" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36a8232661d66dcf713394726157d3cfe0a89bfc85f52d6e9f9bbc2306797fe7" +checksum = "2abd4c3021eefbac5149f994c117b426852bca3a0aad227698527bca6d4ea657" dependencies = [ "cc", - "codespan-reporting 0.12.0", + "codespan-reporting", + "indexmap", "proc-macro2", "quote", "scratch", @@ -741,12 +716,13 @@ dependencies = [ [[package]] name = "cxxbridge-cmd" -version = "1.0.138" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f01e92ab4ce9fd4d16e3bb11b158d98cbdcca803c1417aa43130a6526fbf208" +checksum = "6f12fbc5888b2311f23e52a601e11ad7790d8f0dbb903ec26e2513bf5373ed70" dependencies = [ "clap", - "codespan-reporting 0.11.1", + "codespan-reporting", + "indexmap", "proc-macro2", "quote", "syn", @@ -754,19 +730,19 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.138" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41cbfab344869e70998b388923f7d1266588f56c8ca284abf259b1c1ffc695" +checksum = "83d3dd7870af06e283f3f8ce0418019c96171c9ce122cfb9c8879de3d84388fd" [[package]] name = "cxxbridge-macro" -version = "1.0.138" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d82a2f759f0ad3eae43b96604efd42b1d4729a35a6f2dc7bdb797ae25d9284" +checksum = "a26f0d82da663316786791c3d0e9f9edc7d1ee1f04bdad3d2643086a69d6256c" dependencies = [ + "indexmap", "proc-macro2", "quote", - "rustversion", "syn", ] @@ -786,16 +762,16 @@ dependencies = [ [[package]] name = "datafusion" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe060b978f74ab446be722adb8a274e052e005bf6dfd171caadc3abaad10080" +checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -813,9 +789,9 @@ dependencies = [ "datafusion-functions-nested", "datafusion-functions-table", "datafusion-functions-window", - "datafusion-macros", "datafusion-optimizer", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", @@ -828,7 +804,7 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.2", "regex", "sqlparser", "tempfile", @@ -841,9 +817,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61fe34f401bd03724a1f96d12108144f8cd495a3cdda2bf5e091822fb80b7e66" +checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" dependencies = [ "arrow", "async-trait", @@ -867,9 +843,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4411b8e3bce5e0fc7521e44f201def2e2d5d1b5f176fb56e8cdc9942c890f00" +checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" dependencies = [ "arrow", "async-trait", @@ -890,14 +866,15 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0734015d81c8375eb5d4869b7f7ecccc2ee8d6cb81948ef737cd0e7b743bd69c" +checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" dependencies = [ "ahash", "arrow", "arrow-ipc", "base64", + "chrono", "half", "hashbrown 0.14.5", "indexmap", @@ -914,9 +891,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5167bb1d2ccbb87c6bc36c295274d7a0519b14afcfdaf401d53cbcaa4ef4968b" +checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" dependencies = [ "futures", "log", @@ -925,21 +902,22 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04e602dcdf2f50c2abf297cc2203c73531e6f48b29516af7695d338cf2a778b1" +checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -950,7 +928,7 @@ dependencies = [ "log", "object_store", "parquet", - "rand", + "rand 0.9.2", "tempfile", "tokio", "tokio-util", @@ -961,9 +939,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bb2253952dc32296ed5b84077cb2e0257fea4be6373e1c376426e17ead4ef6" +checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" dependencies = [ "arrow", "async-trait", @@ -986,9 +964,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8c7f47a5d2fe03bfa521ec9bafdb8a5c82de8377f60967c3663f00c8790352" +checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" dependencies = [ "arrow", "async-trait", @@ -1011,9 +989,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d15868ea39ed2dc266728b554f6304acd473de2142281ecfa1294bb7415923" +checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" dependencies = [ "arrow", "async-trait", @@ -1026,9 +1004,11 @@ dependencies = [ "datafusion-expr", "datafusion-functions-aggregate", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", + "datafusion-pruning", "datafusion-session", "futures", "itertools", @@ -1036,23 +1016,24 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "rand", + "rand 0.9.2", "tokio", ] [[package]] name = "datafusion-doc" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a91f8c2c5788ef32f48ff56c68e5b545527b744822a284373ac79bba1ba47292" +checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" [[package]] name = "datafusion-execution" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06f004d100f49a3658c9da6fb0c3a9b760062d96cd4ad82ccc3b7b69a9fb2f84" +checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" dependencies = [ "arrow", + "async-trait", "dashmap", "datafusion-common", "datafusion-expr", @@ -1060,18 +1041,19 @@ dependencies = [ "log", "object_store", "parking_lot", - "rand", + "rand 0.9.2", "tempfile", "url", ] [[package]] name = "datafusion-expr" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a4e4ce3802609be38eeb607ee72f6fe86c3091460de9dbfae9e18db423b3964" +checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" dependencies = [ "arrow", + "async-trait", "chrono", "datafusion-common", "datafusion-doc", @@ -1088,9 +1070,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422ac9cf3b22bbbae8cdf8ceb33039107fde1b5492693168f13bd566b1bcc839" +checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" dependencies = [ "arrow", "datafusion-common", @@ -1101,9 +1083,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ddf0a0a2db5d2918349c978d42d80926c6aa2459cd8a3c533a84ec4bb63479e" +checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", "arrow-buffer", @@ -1121,7 +1103,7 @@ dependencies = [ "itertools", "log", "md-5", - "rand", + "rand 0.9.2", "regex", "sha2", "unicode-segmentation", @@ -1130,9 +1112,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "408a05dafdc70d05a38a29005b8b15e21b0238734dab1e98483fcb58038c5aba" +checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" dependencies = [ "ahash", "arrow", @@ -1151,9 +1133,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "756d21da2dd6c9bef97af1504970ff56cbf35d03fbd4ffd62827f02f4d2279d4" +checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" dependencies = [ "ahash", "arrow", @@ -1164,9 +1146,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d8d50f6334b378930d992d801a10ac5b3e93b846b39e4a05085742572844537" +checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" dependencies = [ "arrow", "arrow-ord", @@ -1176,6 +1158,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", "itertools", @@ -1185,9 +1168,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9a97220736c8fff1446e936be90d57216c06f28969f9ffd3b72ac93c958c8a" +checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" dependencies = [ "arrow", "async-trait", @@ -1201,10 +1184,11 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefc2d77646e1aadd1d6a9c40088937aedec04e68c5f0465939912e1291f8193" +checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" dependencies = [ + "arrow", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -1218,9 +1202,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd4aff082c42fa6da99ce0698c85addd5252928c908eb087ca3cfa64ff16b313" +checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1228,9 +1212,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df6f88d7ee27daf8b108ba910f9015176b36fbc72902b1ca5c2a5f1d1717e1a1" +checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" dependencies = [ "datafusion-expr", "quote", @@ -1239,14 +1223,15 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084d9f979c4b155346d3c34b18f4256e6904ded508e9554d90fed416415c3515" +checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" dependencies = [ "arrow", "chrono", "datafusion-common", "datafusion-expr", + "datafusion-expr-common", "datafusion-physical-expr", "indexmap", "itertools", @@ -1258,9 +1243,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c536062b0076f4e30084065d805f389f9fe38af0ca75bcbac86bc5e9fbab65" +checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" dependencies = [ "ahash", "arrow", @@ -1274,15 +1259,31 @@ dependencies = [ "indexmap", "itertools", "log", + "parking_lot", "paste", "petgraph", ] +[[package]] +name = "datafusion-physical-expr-adapter" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "itertools", +] + [[package]] name = "datafusion-physical-expr-common" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a92b53b3193fac1916a1c5b8e3f4347c526f6822e56b71faa5fb372327a863" +checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" dependencies = [ "ahash", "arrow", @@ -1294,9 +1295,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fa0a5ac94c7cf3da97bedabd69d6bbca12aef84b9b37e6e9e8c25286511b5e2" +checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" dependencies = [ "arrow", "datafusion-common", @@ -1306,6 +1307,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "datafusion-pruning", "itertools", "log", "recursive", @@ -1313,9 +1315,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "690c615db468c2e5fe5085b232d8b1c088299a6c63d87fd960a354a71f7acb55" +checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" dependencies = [ "ahash", "arrow", @@ -1327,6 +1329,7 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1341,11 +1344,29 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-pruning" +version = "50.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools", + "log", +] + [[package]] name = "datafusion-session" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad229a134c7406c057ece00c8743c0c34b97f4e72f78b475fe17b66c5e14fa4f" +checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" dependencies = [ "arrow", "async-trait", @@ -1367,9 +1388,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "47.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f6ab28b72b664c21a27b22a2ff815fd390ed224c26e89a93b5a8154a4e8607" +checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" dependencies = [ "arrow", "bigdecimal", @@ -1418,12 +1439,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1432,6 +1453,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -1440,9 +1467,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.2.10" +version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ "bitflags", "rustc_version", @@ -1450,9 +1477,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -1477,11 +1504,17 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -1592,43 +1625,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", + "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasip2", ] -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "half" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "num-traits", + "zerocopy", ] [[package]] @@ -1643,9 +1673,24 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.4" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hex" @@ -1666,15 +1711,15 @@ dependencies = [ [[package]] name = "humantime" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1696,9 +1741,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -1709,9 +1754,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -1722,11 +1767,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -1737,42 +1781,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -1782,9 +1822,9 @@ dependencies = [ [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -1803,12 +1843,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.9.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", - "hashbrown 0.15.4", + "hashbrown 0.16.0", ] [[package]] @@ -1838,19 +1878,19 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jobserver" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" dependencies = [ "once_cell", "wasm-bindgen", @@ -1864,9 +1904,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lexical-core" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -1877,60 +1917,59 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" dependencies = [ "lexical-parse-integer", "lexical-util", - "static_assertions", ] [[package]] name = "lexical-parse-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] name = "lexical-util" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" -dependencies = [ - "static_assertions", -] +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" [[package]] name = "lexical-write-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" dependencies = [ "lexical-util", "lexical-write-integer", - "static_assertions", ] [[package]] name = "lexical-write-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" dependencies = [ "lexical-util", - "static_assertions", ] +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + [[package]] name = "libc" -version = "0.2.174" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "libm" @@ -1940,49 +1979,48 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libz-rs-sys" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" dependencies = [ "zlib-rs", ] [[package]] name = "link-cplusplus" -version = "1.0.10" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a6f6da007f968f9def0d65a05b187e2960183de70c160204ecfccf0ee330212" +checksum = "7f78c730aaa7d0b9336a299029ea49f9ee53b0ed06e9202e8cb7db9bae7b8c82" dependencies = [ "cc", ] [[package]] name = "linux-raw-sys" -version = "0.9.4" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "lz4_flex" @@ -2016,9 +2054,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "miniz_oxide" @@ -2027,6 +2065,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -2105,18 +2144,18 @@ dependencies = [ [[package]] name = "object" -version = "0.36.7" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.12.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781f96d79ed0f961a7021424ab01840efbda64ae7a505aaea195efc91eaaec4" +checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", "bytes", @@ -2127,7 +2166,7 @@ dependencies = [ "itertools", "parking_lot", "percent-encoding", - "thiserror 2.0.12", + "thiserror 2.0.17", "tokio", "tracing", "url", @@ -2153,9 +2192,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -2163,22 +2202,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "parquet" -version = "55.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231" +checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" dependencies = [ "ahash", "arrow-array", @@ -2195,12 +2234,13 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.15.4", + "hashbrown 0.16.0", "lz4_flex", "num", "num-bigint", "object_store", "paste", + "ring", "seq-macro", "simdutf8", "snap", @@ -2210,15 +2250,6 @@ dependencies = [ "zstd", ] -[[package]] -name = "parse-zoneinfo" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" -dependencies = [ - "regex", -] - [[package]] name = "paste" version = "1.0.15" @@ -2227,54 +2258,36 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", + "hashbrown 0.15.5", "indexmap", + "serde", ] [[package]] name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" dependencies = [ "phf_shared", - "rand", ] [[package]] name = "phf_shared" -version = "0.11.3" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" dependencies = [ "siphasher", ] @@ -2299,9 +2312,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "potential_utf" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -2317,9 +2330,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -2335,8 +2348,8 @@ dependencies = [ "bitflags", "lazy_static", "num-traits", - "rand", - "rand_chacha", + "rand 0.8.5", + "rand_chacha 0.3.1", "rand_xorshift", "regex-syntax", "rusty-fork", @@ -2346,10 +2359,11 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" dependencies = [ + "ar_archive_writer", "cc", ] @@ -2361,9 +2375,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.40" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -2381,8 +2395,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", ] [[package]] @@ -2392,7 +2416,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] @@ -2404,13 +2438,22 @@ dependencies = [ "getrandom 0.2.16", ] +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "rand_xorshift" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -2435,18 +2478,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.13" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.11.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -2456,9 +2499,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -2467,15 +2510,23 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] -name = "rustc-demangle" -version = "0.1.25" +name = "ring" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] [[package]] name = "rustc_version" @@ -2488,28 +2539,28 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.7" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "rustversion" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rusty-fork" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" dependencies = [ "fnv", "quick-error", @@ -2540,15 +2591,15 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scratch" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f6280af86e5f559536da57a45ebc84948833b3bee313a7dd25232e09c878a52" +checksum = "d68f2ec51b097e4c1a75b681a8bec621909b5e91f15bb7b840c4f2f7b01148b2" [[package]] name = "semver" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "seq-macro" @@ -2558,18 +2609,28 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -2578,14 +2639,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.140" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -2605,6 +2667,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "simdutf8" version = "0.1.5" @@ -2637,9 +2705,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "sqlparser" -version = "0.55.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" +checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" dependencies = [ "log", "recursive", @@ -2659,15 +2727,15 @@ dependencies = [ [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", "cfg-if", @@ -2676,16 +2744,10 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strategy-ext" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" dependencies = [ "num-traits", "proptest", @@ -2697,6 +2759,25 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "subtle" version = "2.6.1" @@ -2705,9 +2786,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.104" +version = "2.0.110" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" dependencies = [ "proc-macro2", "quote", @@ -2727,15 +2808,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.20.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2758,11 +2839,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.12" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl 2.0.12", + "thiserror-impl 2.0.17", ] [[package]] @@ -2778,9 +2859,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.12" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", @@ -2818,14 +2899,14 @@ dependencies = [ "iterator-ext", "itertools", "num-traits", - "thiserror 2.0.12", + "thiserror 2.0.17", "tiledb-cxx-interface", ] [[package]] name = "tiledb-common" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" dependencies = [ "anyhow", "num-traits", @@ -2842,6 +2923,7 @@ version = "0.1.0" dependencies = [ "tiledb-arrow", "tiledb-expr", + "tiledb-query-predicates", ] [[package]] @@ -2864,22 +2946,14 @@ dependencies = [ name = "tiledb-expr" version = "0.1.0" dependencies = [ - "anyhow", - "cxx", - "cxx-build", "datafusion", - "itertools", - "num-traits", - "thiserror 2.0.12", "tiledb-arrow", - "tiledb-cxx-interface", - "tiledb-datatype", ] [[package]] name = "tiledb-pod" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" dependencies = [ "itertools", "num-traits", @@ -2895,7 +2969,7 @@ dependencies = [ [[package]] name = "tiledb-proc-macro" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" dependencies = [ "proc-macro2", "quote", @@ -2905,12 +2979,29 @@ dependencies = [ [[package]] name = "tiledb-proptest-config" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" + +[[package]] +name = "tiledb-query-predicates" +version = "0.1.0" +dependencies = [ + "anyhow", + "arrow", + "cxx", + "cxx-build", + "datafusion", + "itertools", + "num-traits", + "thiserror 2.0.17", + "tiledb-arrow", + "tiledb-cxx-interface", + "tiledb-expr", +] [[package]] name = "tiledb-sys-defs" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" [[package]] name = "tiledb-test-array-schema" @@ -2938,8 +3029,14 @@ version = "0.1.0" dependencies = [ "anyhow", "cxx", + "datafusion", + "itertools", + "num-traits", + "thiserror 2.0.17", + "tiledb-arrow", "tiledb-common", "tiledb-cxx-interface", + "tiledb-datatype", "tiledb-test-support-cxx-interface", ] @@ -2987,6 +3084,7 @@ dependencies = [ "tiledb-cxx-interface", "tiledb-expr", "tiledb-pod", + "tiledb-query-predicates", "tiledb-test-array-schema", "tiledb-test-ffi", "tiledb-test-query-condition", @@ -2996,7 +3094,7 @@ dependencies = [ [[package]] name = "tiledb-utils" version = "0.1.0" -source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#e418936fff551dd608e2a1b5e3c557f4c8e5d29d" +source = "git+https://github.com/TileDB-Inc/tiledb-rs.git?branch=main#1dafdf310ed2e8f4e314a40dff9f3ff46a22c64d" dependencies = [ "float_next_after", "num-traits", @@ -3014,9 +3112,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -3024,11 +3122,10 @@ dependencies = [ [[package]] name = "tokio" -version = "1.45.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", "pin-project-lite", "tokio-macros", @@ -3036,9 +3133,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", @@ -3047,9 +3144,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.15" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -3091,15 +3188,15 @@ dependencies = [ [[package]] name = "twox-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unarray" @@ -3109,9 +3206,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-segmentation" @@ -3121,25 +3218,26 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] -name = "unicode-width" -version = "0.2.1" +name = "untrusted" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -3150,11 +3248,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.17.0" +version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", "wasm-bindgen", ] @@ -3191,45 +3289,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" dependencies = [ "cfg-if", "js-sys", @@ -3240,9 +3325,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3250,31 +3335,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" dependencies = [ "js-sys", "wasm-bindgen", @@ -3292,18 +3377,18 @@ dependencies = [ [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", @@ -3314,9 +3399,9 @@ dependencies = [ [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", @@ -3325,9 +3410,9 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", @@ -3336,76 +3421,69 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.3" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ "windows-link", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] name = "windows-sys" -version = "0.60.2" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets 0.53.2", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.52.6" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows-link", ] [[package]] name = "windows-targets" -version = "0.53.2" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] @@ -3414,84 +3492,42 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" - [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -3499,25 +3535,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "xz2" @@ -3530,11 +3557,10 @@ dependencies = [ [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -3542,9 +3568,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", @@ -3554,18 +3580,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", @@ -3595,9 +3621,9 @@ dependencies = [ [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -3606,9 +3632,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.2" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -3617,9 +3643,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", @@ -3628,9 +3654,9 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" +checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" [[package]] name = "zstd" @@ -3652,9 +3678,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/tiledb/oxidize/Cargo.toml b/tiledb/oxidize/Cargo.toml index 9647dd66822..e30351be48b 100644 --- a/tiledb/oxidize/Cargo.toml +++ b/tiledb/oxidize/Cargo.toml @@ -10,6 +10,7 @@ members = [ "staticlibs/core-objects", "staticlibs/unit-arithmetic", "staticlibs/unit-query-condition", + "query-predicates", "test-support/array-schema", "test-support/cxx-interface", "test-support/ffi", @@ -23,10 +24,10 @@ version = "0.1.0" [workspace.dependencies] anyhow = "1" -arrow = { version = "55" } -cxx = "=1.0.138" -cxx-build = "1.0.138" -datafusion = { version = "47", features = [] } +arrow = { version = "56" } +cxx = "1" +cxx-build = "1" +datafusion = { version = "50", features = [] } iterator-ext = { path = "iterator-ext" } itertools = { version = "0.14" } num-traits = "0.2" @@ -38,6 +39,7 @@ tiledb-cxx-interface = { path = "cxx-interface" } tiledb-datatype = { path = "datatype" } tiledb-expr = { path = "expr" } tiledb-pod = { git = "https://github.com/TileDB-Inc/tiledb-rs.git", branch = "main", features = [ "proptest-strategies" ] } +tiledb-query-predicates = { path = "query-predicates" } tiledb-test-array-schema = { path = "test-support/array-schema" } tiledb-test-cells = { package = "cells", git = "https://github.com/TileDB-Inc/tiledb-rs.git", branch = "main", features = [ "proptest-strategies" ] } tiledb-test-ffi = { path = "test-support/ffi" } diff --git a/tiledb/oxidize/arrow/src/enumeration.rs b/tiledb/oxidize/arrow/src/enumeration.rs new file mode 100644 index 00000000000..808e74fafe9 --- /dev/null +++ b/tiledb/oxidize/arrow/src/enumeration.rs @@ -0,0 +1,47 @@ +use std::sync::Arc; + +use arrow::array::Array as ArrowArray; +use arrow::datatypes::Field as ArrowField; + +use tiledb_cxx_interface::sm::array_schema::Enumeration; + +use crate::{record_batch, schema}; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Enumeration data type error: {0}")] + DataType(#[from] crate::schema::FieldError), + #[error("Enumeration variants error: {0}")] + Variants(#[from] crate::record_batch::FieldError), +} + +/// Returns an [ArrowArray] whose elements are the variants of an [Enumeration]. +/// +/// # Safety +/// +/// When possible this function avoids copying data. This means that the +/// returned [ArrowArray] may reference data which lives inside the [Enumeration]. +/// It is not safe to use the value returned from this function after +/// the [Enumeration] is destructed. The caller must take care to abide this +/// requirement. Otherwise this function is safe to use. +pub unsafe fn array_from_enumeration( + enumeration: &Enumeration, +) -> Result, Error> { + let field = { + let adt = schema::arrow_datatype(enumeration.datatype(), enumeration.cell_val_num())?; + ArrowField::new("unused", adt, false) + }; + + if let Some(offsets) = enumeration.offsets() { + let (_, offsets, _) = unsafe { + // SAFETY: just a transmutes u64 to u8 which always succeeds + // with no possible alignment issues + offsets.align_to::() + }; + Ok(unsafe { + record_batch::to_arrow_array(&field, offsets, Some(enumeration.data()), None) + }?) + } else { + Ok(unsafe { record_batch::to_arrow_array(&field, enumeration.data(), None, None) }?) + } +} diff --git a/tiledb/oxidize/arrow/src/lib.rs b/tiledb/oxidize/arrow/src/lib.rs index 79f7dbc2477..13c9784778e 100644 --- a/tiledb/oxidize/arrow/src/lib.rs +++ b/tiledb/oxidize/arrow/src/lib.rs @@ -1,50 +1,16 @@ #[cxx::bridge] pub mod ffi { - #[namespace = "tiledb::sm"] - extern "C++" { - include!("tiledb/sm/array_schema/array_schema.h"); - include!("tiledb/sm/query/readers/result_tile.h"); - - type ArraySchema = tiledb_cxx_interface::sm::array_schema::ArraySchema; - type ResultTile = tiledb_cxx_interface::sm::query::readers::ResultTile; - } - + /// Indicates how an [ArraySchema] should be translated into an Arrow [Schema]. + /// + /// See `schema` module documentation. #[namespace = "tiledb::oxidize::arrow::schema"] - extern "Rust" { - type ArrowSchema; - - #[cxx_name = "create"] - fn array_schema_to_arrow_schema( - schema: &ArraySchema, - select: &CxxVector, - ) -> Result>; - } - - #[namespace = "tiledb::oxidize::arrow::record_batch"] - extern "Rust" { - type ArrowRecordBatch; - - #[cxx_name = "create"] - unsafe fn result_tile_to_record_batch( - schema: &ArrowSchema, - tile: &ResultTile, - ) -> Result>; + enum WhichSchema { + Storage, + View, } } +pub mod enumeration; pub mod offsets; pub mod record_batch; pub mod schema; - -use record_batch::{ArrowRecordBatch, to_record_batch as result_tile_to_record_batch}; -use schema::{ArrowSchema, cxx::to_arrow as array_schema_to_arrow_schema}; - -unsafe impl cxx::ExternType for ArrowRecordBatch { - type Id = cxx::type_id!("tiledb::oxidize::arrow::record_batch::ArrowRecordBatch"); - type Kind = cxx::kind::Opaque; -} - -unsafe impl cxx::ExternType for ArrowSchema { - type Id = cxx::type_id!("tiledb::oxidize::arrow::schema::ArrowSchema"); - type Kind = cxx::kind::Opaque; -} diff --git a/tiledb/oxidize/arrow/src/record_batch.rs b/tiledb/oxidize/arrow/src/record_batch.rs index 8ce7720e73c..584afe4518e 100644 --- a/tiledb/oxidize/arrow/src/record_batch.rs +++ b/tiledb/oxidize/arrow/src/record_batch.rs @@ -6,15 +6,14 @@ use std::sync::Arc; use arrow::array::{ - self as aa, Array as ArrowArray, FixedSizeListArray, GenericListArray, PrimitiveArray, + self as aa, Array as ArrowArray, FixedSizeListArray, GenericListArray, LargeStringArray, + PrimitiveArray, }; use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer}; -use arrow::datatypes::{self as adt, ArrowPrimitiveType, Field}; -use arrow::record_batch::RecordBatch; +use arrow::datatypes::{self as adt, ArrowPrimitiveType, Field, Schema as ArrowSchema}; +use arrow::record_batch::{RecordBatch, RecordBatchOptions}; use tiledb_cxx_interface::sm::query::readers::{ResultTile, TileTuple}; -use tiledb_cxx_interface::sm::tile::Tile; -use super::*; use crate::offsets::Error as OffsetsError; /// An error creating a [RecordBatch] to represent a [ResultTile]. @@ -39,11 +38,10 @@ pub enum FieldError { InternalUnalignedValues, #[error("Internal error: invalid variable-length data offsets: {0}")] InternalOffsets(#[from] OffsetsError), -} - -/// Wraps a [RecordBatch] for passing across the FFI boundary. -pub struct ArrowRecordBatch { - pub arrow: RecordBatch, + #[error("Error reading tile: {0}")] + InvalidTileData(#[source] arrow::error::ArrowError), + #[error("Attributes with enumerations are not supported in text predicates")] + EnumerationNotSupported, } /// Returns a [RecordBatch] which contains the same contents as a [ResultTile]. @@ -56,11 +54,10 @@ pub struct ArrowRecordBatch { /// long as the returned [RecordBatch] is not used after the [ResultTile] /// is destructed. pub unsafe fn to_record_batch( - schema: &ArrowSchema, + schema: &Arc, tile: &ResultTile, -) -> Result, Error> { +) -> Result { let columns = schema - .0 .fields() .iter() .map(|f| { @@ -87,24 +84,18 @@ pub unsafe fn to_record_batch( .collect::>, _>>()?; // SAFETY: should be clear from iteration - assert_eq!(schema.0.fields().len(), columns.len()); + assert_eq!(schema.fields().len(), columns.len()); // SAFETY: `tile_to_arrow_array` must do this, major internal error if not // which is not recoverable assert!( schema - .0 .fields() .iter() .zip(columns.iter()) .all(|(f, c)| f.data_type() == c.data_type()) ); - // SAFETY: `schema` has at least one field. - // This is not required in general, but `schema` is a projection of an `ArraySchema` - // which always has at least one dimension. - assert!(!columns.is_empty()); - // SAFETY: dependent on the correctness of `tile_to_arrow_array` AND the integrity of // the underlying `ResultTile`. // Neither of these conditions is a recoverable error from the user perspective - @@ -112,15 +103,24 @@ pub unsafe fn to_record_batch( assert!( columns.iter().all(|c| c.len() as u64 == tile.cell_num()), "Columns do not all have same number of cells: {:?} {:?}", - schema.0.fields(), + schema.fields(), columns.iter().map(|c| c.len()).collect::>() ); // SAFETY: the four asserts above rule out each of the possible error conditions - let arrow = RecordBatch::try_new(Arc::clone(&schema.0), columns) - .expect("Logic error: preconditions for constructing RecordBatch not met"); + let arrow = if columns.is_empty() { + RecordBatch::try_new_with_options( + Arc::clone(schema), + columns, + &RecordBatchOptions::new().with_row_count(Some(tile.cell_num() as usize)), + ) + } else { + RecordBatch::try_new(Arc::clone(schema), columns) + }; - Ok(Box::new(ArrowRecordBatch { arrow })) + let arrow = arrow.expect("Logic error: preconditions for constructing RecordBatch not met"); + + Ok(arrow) } /// Returns an [ArrowArray] which contains the same contents as the provided @@ -139,41 +139,40 @@ unsafe fn tile_to_arrow_array( unsafe { // SAFETY: the caller is responsible that each of the tiles tile out-live // the `Arc` created here. See function docs. - to_arrow_array(f, tile.fixed_tile(), tile.var_tile(), tile.validity_tile()) + to_arrow_array( + f, + tile.fixed_tile().as_slice(), + tile.var_tile().map(|t| t.as_slice()), + tile.validity_tile().map(|t| t.as_slice()), + ) } } /// Returns an [ArrowArray] which contains the same contents as the provided -/// triple of [Tile]s. +/// triple of `&[u8]`s. /// /// If `var.is_some()`, then `fixed` contains the offsets and `var` contains /// the values. Otherwise, `fixed` contains the values. /// -/// The `validity` [Tile] contains one value per cell. +/// The `validity` `&[u8]` contains one value per cell. /// /// # Safety /// /// When possible this function avoids copying data. This means that the -/// returned [ArrowArray] may reference data which lives inside the [Tile]s. +/// returned [ArrowArray] may reference data which lives inside the `&[u8]`s. /// This function is safe to call as long as the returned [ArrowArray] is not -/// used after those [Tile]s are destructed. -unsafe fn to_arrow_array( +/// used after the data which the `&[u8]` borrows are destructed. +pub unsafe fn to_arrow_array( f: &Field, - fixed: &Tile, - var: Option<&Tile>, - validity: Option<&Tile>, + fixed: &[u8], + var: Option<&[u8]>, + validity: Option<&[u8]>, ) -> Result, FieldError> { let null_buffer = if let Some(validity) = validity { if !f.is_nullable() { return Err(FieldError::UnexpectedValidityTile); } - Some( - validity - .as_slice() - .iter() - .map(|v| *v != 0) - .collect::(), - ) + Some(validity.iter().map(|v| *v != 0).collect::()) } else if f.is_nullable() { // NB: this is allowed even for nullable fields, it means that none of // the cells is `NULL`. Note that due to schema evolution the arrow @@ -221,6 +220,22 @@ unsafe fn to_arrow_array( null_buffer, ))) } + DataType::LargeUtf8 => { + let Some(var_tile) = var else { + return Err(FieldError::ExpectedVarTile); + }; + let offsets = crate::offsets::try_from_bytes(1, fixed)?; + let values = unsafe { + // SAFETY: the caller is responsible that `fixed` out-lives + // the `Buffer` created here. See function docs. + to_buffer::(var_tile) + }?; + + Ok(Arc::new( + LargeStringArray::try_new(offsets, values.into(), null_buffer) + .map_err(FieldError::InvalidTileData)?, + )) + } DataType::LargeList(value_field) => { let Some(var_tile) = var else { return Err(FieldError::ExpectedVarTile); @@ -238,6 +253,12 @@ unsafe fn to_arrow_array( null_buffer, ))) } + DataType::Null => { + // NB: see `arrow/src/schema.rs`. + // This represents the value type of an attribute with an enumeration + // which we will implement later in CORE-285. + Err(FieldError::EnumerationNotSupported) + } _ => { // SAFETY: ensured by limited range of return values of `crate::schema::arrow_datatype` unreachable!( @@ -257,48 +278,63 @@ unsafe fn to_arrow_array( /// This function is safe to call as long as the returned [PrimitiveArray] /// is not used after the argument [Tile] is destructed. unsafe fn to_primitive_array( - tile: &Tile, + bytes: &[u8], validity: Option, ) -> Result, FieldError> +where + T: ArrowPrimitiveType, +{ + let values = unsafe { + // SAFETY: TODO add comment + to_buffer::(bytes) + }?; + Ok(Arc::new(PrimitiveArray::::new(values, validity)) as Arc) +} + +/// Returns a [Buffer] which refers to the data contained inside the `&[u8]`. +/// +/// # Safety +/// +/// This function is safe to call as long as the returned [Buffer] +/// is not used after the argument [Tile] is destructed. +unsafe fn to_buffer(bytes: &[u8]) -> Result, FieldError> where T: ArrowPrimitiveType, { let (prefix, values, suffix) = { // SAFETY: transmuting u8 to primitive types is safe - unsafe { tile.as_slice().align_to::() } + unsafe { bytes.align_to::() } }; if !(prefix.is_empty() && suffix.is_empty()) { return Err(FieldError::InternalUnalignedValues); } - let tile_buffer = if let Some(ptr) = std::ptr::NonNull::new(values.as_ptr() as *mut u8) { - // SAFETY: - // - // `Buffer::from_custom_allocation` creates a buffer which refers to an existing - // memory region whose ownership is tracked by some `Arc`. - // `Allocation` is basically any type, whose `drop` implementation is responsible - // for freeing the memory. - // - // The tile memory which we reference lives on the `extern "C++"` side of the - // FFI boundary, as such we cannot use `Arc` to track its lifetime. - // - // As such: - // 1) we will use an object with trivial `drop` to set up the memory aliasing - // 2) there is an implicit lifetime requirement that the Tile must out-live - // this Buffer, else we shall suffer use after free - // 3) the caller is responsible for upholding that guarantee - unsafe { Buffer::from_custom_allocation(ptr, tile.size() as usize, Arc::new(())) } - } else { - Buffer::from_vec(Vec::::new()) - }; - Ok(Arc::new(PrimitiveArray::::new( - ScalarBuffer::from(tile_buffer), - validity, - )) as Arc) + Ok(ScalarBuffer::::from( + if let Some(ptr) = std::ptr::NonNull::new(values.as_ptr() as *mut u8) { + // SAFETY: + // + // `Buffer::from_custom_allocation` creates a buffer which refers to an existing + // memory region whose ownership is tracked by some `Arc`. + // `Allocation` is basically any type, whose `drop` implementation is responsible + // for freeing the memory. + // + // The tile memory which we reference lives on the `extern "C++"` side of the + // FFI boundary, as such we cannot use `Arc` to track its lifetime. + // + // As such: + // 1) we will use an object with trivial `drop` to set up the memory aliasing + // 2) there is an implicit lifetime requirement that the Tile must out-live + // this Buffer, else we shall suffer use after free + // 3) the caller is responsible for upholding that guarantee + unsafe { Buffer::from_custom_allocation(ptr, bytes.len(), Arc::new(())) } + } else { + Buffer::from_vec(Vec::::new()) + }, + )) } -/// Returns an [OffsetBuffer] which represents the contents of the [Tile]. -fn to_offsets_buffer(value_field: &Field, tile: &Tile) -> Result, OffsetsError> { +/// Returns an [OffsetBuffer] which represents the contents of the `[u8]`. +fn to_offsets_buffer(value_field: &Field, bytes: &[u8]) -> Result, OffsetsError> { let Some(value_size) = value_field.data_type().primitive_width() else { // SAFETY: all list types have primitive element // FIXME: this is true for schema fields, not generally true, @@ -308,5 +344,5 @@ fn to_offsets_buffer(value_field: &Field, tile: &Tile) -> Result, Utf8Error), #[error("Error in field '{0}': {1}")] FieldError(String, FieldError), + #[error("Error in enumeration '{0}': {1}")] + EnumerationError(String, crate::enumeration::Error), } /// An error converting an [ArraySchema] [Field] to [ArrowField]. @@ -26,83 +50,174 @@ pub enum FieldError { InvalidCellValNum(CellValNum), #[error("Internal error: invalid discriminant for data type: {0}")] InternalInvalidDatatype(u8), + #[error("Internal error: enumeration not found: {0}")] + InternalEnumerationNotFound(String), + #[error("Enumeration name is not UTF-8")] + EnumerationNameNotUtf8(Vec, Utf8Error), } -/// Wraps a [Schema] for passing across the FFI boundary. -pub struct ArrowSchema(pub Arc); +pub type Enumerations = HashMap>>; -impl Deref for ArrowSchema { - type Target = Arc; - fn deref(&self) -> &Self::Target { - &self.0 - } +/// Wraps a [Schema] for passing across the FFI boundary. +pub struct ArrowArraySchema { + pub schema: Arc, + pub enumerations: Arc, } -pub mod cxx { - use super::*; - - /// Returns a [Schema] which represents the physical field types of - /// the fields from `array_schema` which are contained in `select`. - pub fn to_arrow( - array_schema: &ArraySchema, - select: &::cxx::Vector<::cxx::String>, - ) -> Result, Error> { - Ok(Box::new(ArrowSchema(Arc::new(super::to_arrow( - array_schema, - |field: &Field| select.iter().any(|s| s == field.name_cxx()), - )?)))) - } +pub fn to_arrow( + array_schema: &ArraySchema, + which: WhichSchema, +) -> Result<(Schema, Enumerations), Error> { + project_arrow(array_schema, which, |_: &Field| true) } /// Returns a [Schema] which represents the physical field types of the selected fields from `array_schema`. -pub fn to_arrow(array_schema: &ArraySchema, select: F) -> Result +pub fn project_arrow( + array_schema: &ArraySchema, + which: WhichSchema, + select: F, +) -> Result<(Schema, Enumerations), Error> where F: Fn(&Field) -> bool, { - let fields = array_schema.fields().filter(select).map(|f| { - let field_name = f - .name() - .map_err(|e| Error::NameNotUtf8(f.name_cxx().as_bytes().to_vec(), e))?; - let arrow_type = - field_arrow_datatype(&f).map_err(|e| Error::FieldError(field_name.to_owned(), e))?; - - // NB: fields can always be null due to schema evolution - Ok(ArrowField::new(field_name, arrow_type, true)) - }); - - Ok(Schema { - fields: fields.collect::>()?, - metadata: Default::default(), - }) + let fields = array_schema + .fields() + .filter(select) + .map(|f| { + let field_name = f + .name() + .map_err(|e| Error::NameNotUtf8(f.name_cxx().as_bytes().to_vec(), e))?; + let arrow_type = field_arrow_datatype(array_schema, which, &f) + .map_err(|e| Error::FieldError(field_name.to_owned(), e))?; + + // NB: fields can always be null due to schema evolution + let arrow = ArrowField::new(field_name, arrow_type, true); + + if let Some(ename) = f.enumeration_name() { + let ename = ename + .map_err(|e| { + let ename_cxx = { + // SAFETY: it's `Some` to get into the block, it still will be + f.enumeration_name_cxx().unwrap() + }; + FieldError::EnumerationNameNotUtf8(ename_cxx.as_bytes().to_vec(), e) + }) + .map_err(|e| Error::FieldError(field_name.to_owned(), e))?; + Ok(arrow.with_metadata(HashMap::from([( + "enumeration".to_owned(), + ename.to_owned(), + )]))) + } else { + Ok(arrow) + } + }) + .collect::>()?; + + let enumerations = fields + .iter() + .filter_map(|f| f.metadata().get("enumeration")) + .unique() + .map(|e| { + let enumeration = array_schema.enumeration(e); + if enumeration.is_null() { + Ok((e.to_owned(), None)) + } else { + let a = unsafe { + // SAFETY: TODO comment + crate::enumeration::array_from_enumeration(&enumeration) + } + .map_err(|err| Error::EnumerationError(e.to_owned(), err))?; + Ok((e.to_owned(), Some(a))) + } + }) + .collect::>()?; + + Ok(( + Schema { + fields, + metadata: Default::default(), + }, + enumerations, + )) } /// Returns an [ArrowDataType] which represents the physical data type of `field`. -pub fn field_arrow_datatype(field: &Field) -> Result { - match field.cell_val_num() { - CellValNum::Single => Ok(arrow_datatype(field.datatype())?), +pub fn field_arrow_datatype( + array_schema: &ArraySchema, + which: WhichSchema, + field: &Field, +) -> Result { + match which { + WhichSchema::Storage => arrow_datatype(field.datatype(), field.cell_val_num()), + WhichSchema::View => { + let Some(e_name) = field.enumeration_name_cxx() else { + return arrow_datatype(field.datatype(), field.cell_val_num()); + }; + if !array_schema.has_enumeration(e_name) { + return Err(FieldError::InternalEnumerationNotFound( + e_name.to_string_lossy().into_owned(), + )); + } + + // NB: This branch is reached from + // `tiledb_query_predicates::Builder::add_predicate` which requires + // a schema in order to parse the text into logical expression. + // However, we may not have the enumeration loaded, and without + // loading it we don't know the type (since the type is co-located + // in storage with the variants). + // We should not need to load all enumerations (potentially expensive) + // in order to parse text. + // We also should not error here because then nothing can be parsed + // if there are *any* enumerations in the schema. + // We can work around this by adding an intermediate step to analyze + // the SQL expression tree. + // We defer the implementation of this workaround, and other questions + // about enumeration evaluation, to CORE-285 + // + // For now we return a type which can only appear in this way, + // to return an error later. + Ok(ArrowDataType::Null) + } + invalid => unreachable!( + "Request for invalid schema type with discriminant {}", + invalid.repr + ), + } +} + +pub fn arrow_datatype( + datatype: Datatype, + cell_val_num: CellValNum, +) -> Result { + match cell_val_num { + CellValNum::Single => Ok(arrow_primitive_datatype(datatype)?), CellValNum::Fixed(nz) => { if let Ok(fixed_length) = i32::try_from(nz.get()) { - let value_type = arrow_datatype(field.datatype())?; + let value_type = arrow_primitive_datatype(datatype)?; Ok(ArrowDataType::FixedSizeList( Arc::new(ArrowField::new_list_field(value_type, false)), fixed_length, )) } else { // cell val num greater than i32::MAX - Err(FieldError::InvalidCellValNum(field.cell_val_num())) + Err(FieldError::InvalidCellValNum(cell_val_num)) } } CellValNum::Var => { - let value_type = arrow_datatype(field.datatype())?; - Ok(ArrowDataType::LargeList(Arc::new( - ArrowField::new_list_field(value_type, false), - ))) + if matches!(datatype, Datatype::STRING_ASCII | Datatype::STRING_UTF8) { + Ok(ArrowDataType::LargeUtf8) + } else { + let value_type = arrow_primitive_datatype(datatype)?; + Ok(ArrowDataType::LargeList(Arc::new( + ArrowField::new_list_field(value_type, false), + ))) + } } } } /// Returns an [ArrowDataType] which represents the physical type of a single value of `datatype`. -pub fn arrow_datatype(datatype: Datatype) -> Result { +pub fn arrow_primitive_datatype(datatype: Datatype) -> Result { Ok(match datatype { Datatype::INT8 => ArrowDataType::Int8, Datatype::INT16 => ArrowDataType::Int16, diff --git a/tiledb/oxidize/cxx-interface/build.rs b/tiledb/oxidize/cxx-interface/build.rs index b4c23c432dd..6c30adca629 100644 --- a/tiledb/oxidize/cxx-interface/build.rs +++ b/tiledb/oxidize/cxx-interface/build.rs @@ -2,6 +2,7 @@ fn main() { let bridge_sources = vec![ "src/common/memory_tracker.rs", "src/sm/array_schema/mod.rs", + "src/sm/buffer.rs", "src/sm/enums/mod.rs", "src/sm/misc/mod.rs", "src/sm/query/ast/mod.rs", diff --git a/tiledb/oxidize/cxx-interface/cc/array_schema.cc b/tiledb/oxidize/cxx-interface/cc/array_schema.cc index d397072bfb7..861745eed39 100644 --- a/tiledb/oxidize/cxx-interface/cc/array_schema.cc +++ b/tiledb/oxidize/cxx-interface/cc/array_schema.cc @@ -30,4 +30,38 @@ void set_tile_extent(Dimension& dimension, rust::Slice domain) { } // namespace dimension +namespace enumeration { + +rust::Slice data_cxx(const Enumeration& enumeration) { + std::span span = enumeration.data(); + return rust::Slice(span.data(), span.size()); +} + +rust::Slice offsets_cxx(const Enumeration& enumeration) { + std::span span = enumeration.offsets(); + return rust::Slice(span.data(), span.size()); +} + +} // namespace enumeration + +namespace array_schema { + +std::unique_ptr> enumerations( + const ArraySchema& schema) { + std::unique_ptr> e( + new std::vector(schema.enumeration_map().size())); + + for (const auto& enmr : schema.enumeration_map()) { + if (enmr.second == nullptr) { + e->push_back(MaybeEnumeration::not_loaded(enmr.first)); + } else { + e->push_back(MaybeEnumeration::loaded(enmr.second)); + } + } + + return e; +} + +} // namespace array_schema + } // namespace tiledb::oxidize::sm diff --git a/tiledb/oxidize/cxx-interface/cc/array_schema.h b/tiledb/oxidize/cxx-interface/cc/array_schema.h index 5e3dfd3213d..765438abbe0 100644 --- a/tiledb/oxidize/cxx-interface/cc/array_schema.h +++ b/tiledb/oxidize/cxx-interface/cc/array_schema.h @@ -3,6 +3,7 @@ #include "tiledb/sm/array_schema/attribute.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/array_schema/domain.h" +#include "tiledb/sm/array_schema/enumeration.h" namespace tiledb::oxidize::sm { @@ -27,4 +28,47 @@ void set_tile_extent(Dimension& dimension, rust::Slice domain); } // namespace dimension +namespace enumeration { + +using ConstEnumeration = const tiledb::sm::Enumeration; + +rust::Slice data_cxx(const Enumeration& enumeration); + +rust::Slice offsets_cxx(const Enumeration& enumeration); + +} // namespace enumeration + +namespace array_schema { + +struct MaybeEnumeration { + std::optional> name_; + std::shared_ptr value_; + + static MaybeEnumeration not_loaded(const std::string& enumeration_name) { + return MaybeEnumeration{ + .name_ = std::optional(std::cref(enumeration_name)), .value_ = nullptr}; + } + + static MaybeEnumeration loaded(std::shared_ptr value) { + return MaybeEnumeration{.name_ = std::nullopt, .value_ = value}; + } + + const std::string& name() const { + if (name_.has_value()) { + return name_.value().get(); + } else { + return value_->name(); + } + } + + std::shared_ptr get() const { + return value_; + } +}; + +std::unique_ptr> enumerations( + const ArraySchema& schema); + +} // namespace array_schema + } // namespace tiledb::oxidize::sm diff --git a/tiledb/oxidize/cxx-interface/src/sm/array_schema/mod.rs b/tiledb/oxidize/cxx-interface/src/sm/array_schema/mod.rs index 8adb45168c8..9e7d4138fc3 100644 --- a/tiledb/oxidize/cxx-interface/src/sm/array_schema/mod.rs +++ b/tiledb/oxidize/cxx-interface/src/sm/array_schema/mod.rs @@ -79,6 +79,25 @@ mod ffi { #[cxx_name = "type"] fn datatype(&self) -> Datatype; + + #[namespace = "tiledb::oxidize::sm::enumeration"] + fn data_cxx(enumeration: &Enumeration) -> &[u8]; + + #[namespace = "tiledb::oxidize::sm::enumeration"] + fn offsets_cxx(enumeration: &Enumeration) -> &[u8]; + } + + #[namespace = "tiledb::oxidize::sm::enumeration"] + unsafe extern "C++" { + type ConstEnumeration; + } + + #[namespace = "tiledb::oxidize::sm::array_schema"] + unsafe extern "C++" { + type MaybeEnumeration; + + fn name(&self) -> &CxxString; + fn get(&self) -> SharedPtr; } #[namespace = "tiledb::sm"] @@ -93,12 +112,18 @@ mod ffi { fn is_attr(&self, name: &CxxString) -> bool; fn is_dim(&self, name: &CxxString) -> bool; + fn has_attribute(&self, name: &CxxString) -> bool; + fn has_enumeration(&self, name: &CxxString) -> bool; + #[cxx_name = "attribute"] fn attribute_by_idx(&self, idx: u32) -> *const Attribute; #[cxx_name = "attribute"] fn attribute_by_name(&self, name: &CxxString) -> *const Attribute; + #[cxx_name = "get_enumeration"] + fn const_enumeration_cxx(&self, name: &CxxString) -> SharedPtr; + #[cxx_name = "cell_val_num"] fn cell_val_num_cxx(&self, name: &CxxString) -> u32; @@ -116,11 +141,15 @@ mod ffi { fn set_cell_order(self: Pin<&mut ArraySchema>, order: Layout); fn set_capacity(self: Pin<&mut ArraySchema>, capacity: u64); fn set_allows_dups(self: Pin<&mut ArraySchema>, allows_dups: bool); + + #[namespace = "tiledb::oxidize::sm::array_schema"] + fn enumerations(schema: &ArraySchema) -> UniquePtr>; } impl SharedPtr {} impl SharedPtr {} impl SharedPtr {} + impl SharedPtr {} impl SharedPtr {} impl UniquePtr {} impl UniquePtr {} @@ -135,7 +164,10 @@ use std::str::Utf8Error; use num_traits::ToBytes; -pub use ffi::{ArraySchema, Attribute, ConstAttribute, Datatype, Dimension, Domain, Enumeration}; +pub use ffi::{ + ArraySchema, Attribute, ConstAttribute, Datatype, Dimension, Domain, Enumeration, + MaybeEnumeration, +}; #[derive(Debug)] pub enum CellValNum { @@ -155,6 +187,10 @@ impl CellValNum { n => Some(Self::Fixed(NonZeroU32::new(n)?)), } } + + pub fn is_var(&self) -> bool { + matches!(self, CellValNum::Var) + } } impl Display for CellValNum { @@ -241,17 +277,19 @@ impl Attribute { CellValNum::from_cxx(cxx).unwrap() } - pub fn enumeration_name_cxx(&self) -> *const cxx::CxxString { - ffi::enumeration_name_cxx(self) - } - - pub fn enumeration_name(&self) -> Option> { - let ptr = self.enumeration_name_cxx(); + pub fn enumeration_name_cxx(&self) -> Option<&cxx::CxxString> { + let ptr = ffi::enumeration_name_cxx(self); if ptr.is_null() { return None; } - let cxx = unsafe { &*ptr }; - Some(cxx.to_str()) + Some(unsafe { + // SAFETY: null check above + &*ptr + }) + } + + pub fn enumeration_name(&self) -> Option> { + self.enumeration_name_cxx().map(|s| s.to_str()) } } @@ -306,6 +344,13 @@ impl Field<'_> { } } + pub fn enumeration_name_cxx(&self) -> Option<&cxx::CxxString> { + match self { + Self::Attribute(a) => a.enumeration_name_cxx(), + Self::Dimension(_) => None, + } + } + pub fn enumeration_name(&self) -> Option> { match self { Self::Attribute(a) => a.enumeration_name(), @@ -321,6 +366,24 @@ impl Enumeration { // SAFETY: non-zero would have been validated by the ArraySchema CellValNum::from_cxx(cxx).unwrap() } + + pub fn data(&self) -> &[u8] { + ffi::data_cxx(self) + } + + pub fn offsets(&self) -> Option<&[u64]> { + let b = ffi::offsets_cxx(self); + if b.is_empty() { + None + } else { + let (prefix, offsets, suffix) = unsafe { b.align_to::() }; + + assert!(prefix.is_empty()); + assert!(suffix.is_empty()); + + Some(offsets) + } + } } impl ArraySchema { @@ -367,4 +430,31 @@ impl ArraySchema { .map(Field::Dimension) .chain(self.attributes().map(Field::Attribute)) } + + pub fn enumeration_cxx(&self, name: &cxx::CxxString) -> cxx::SharedPtr { + let e = self.const_enumeration_cxx(name); + assert_eq!( + std::mem::size_of::>(), + std::mem::size_of::>() + ); + unsafe { + // SAFETY: + // 1) SharedPtr has the same representation regardless of generic + // 2) the deleter for `Enumeration` and `const Enumeration` is the same + // 3) the `cxx::SharedPtr` Rust API does not provide a (safe) way to + // get a mutable reference, so this transmutation preserves const-ness + std::mem::transmute::<_, cxx::SharedPtr>(e) + } + } + + pub fn enumeration(&self, name: &str) -> cxx::SharedPtr { + cxx::let_cxx_string!(cxxname = name); + self.enumeration_cxx(&cxxname) + } + + /// Returns a list of the enumerations in this schema, each of which + /// may or may not be loaded. + pub fn enumerations(&self) -> cxx::UniquePtr> { + ffi::enumerations(self) + } } diff --git a/tiledb/oxidize/cxx-interface/src/sm/buffer.rs b/tiledb/oxidize/cxx-interface/src/sm/buffer.rs new file mode 100644 index 00000000000..af99bd770d0 --- /dev/null +++ b/tiledb/oxidize/cxx-interface/src/sm/buffer.rs @@ -0,0 +1,29 @@ +#[cxx::bridge] +mod ffi { + #[namespace = "tiledb::sm"] + unsafe extern "C++" { + include!("tiledb/sm/buffer/buffer.h"); + type Buffer; + + fn size(&self) -> u64; + fn offset(&self) -> u64; + + #[cxx_name = "bytes"] + fn as_ptr(&self) -> *const u8; + } +} + +pub use ffi::Buffer; + +impl Buffer { + pub fn as_slice(&self) -> &[u8] { + let ptr = self.as_ptr(); + let ptr = if ptr.is_null() { + assert_eq!(0, self.size()); + std::ptr::NonNull::::dangling().as_ptr() + } else { + ptr + }; + unsafe { std::slice::from_raw_parts(ptr, self.size() as usize) } + } +} diff --git a/tiledb/oxidize/cxx-interface/src/sm/mod.rs b/tiledb/oxidize/cxx-interface/src/sm/mod.rs index 24c91cdf903..b3d86a39d21 100644 --- a/tiledb/oxidize/cxx-interface/src/sm/mod.rs +++ b/tiledb/oxidize/cxx-interface/src/sm/mod.rs @@ -1,4 +1,5 @@ pub mod array_schema; +pub mod buffer; pub mod enums; pub mod misc; pub mod query; diff --git a/tiledb/oxidize/cxx-interface/src/sm/query/ast/mod.rs b/tiledb/oxidize/cxx-interface/src/sm/query/ast/mod.rs index 8c4e5690a9c..6cabc27047a 100644 --- a/tiledb/oxidize/cxx-interface/src/sm/query/ast/mod.rs +++ b/tiledb/oxidize/cxx-interface/src/sm/query/ast/mod.rs @@ -22,6 +22,7 @@ mod ffi { fn get_offsets(&self) -> &ByteVecValue; fn num_children(&self) -> u64; fn get_child(&self, i: u64) -> *const ASTNode; + fn use_enumeration(&self) -> bool; } impl SharedPtr {} diff --git a/tiledb/oxidize/expr/Cargo.toml b/tiledb/oxidize/expr/Cargo.toml index b2d27d82f76..18a3f084d5e 100644 --- a/tiledb/oxidize/expr/Cargo.toml +++ b/tiledb/oxidize/expr/Cargo.toml @@ -5,15 +5,5 @@ rust-version = { workspace = true } version = { workspace = true } [dependencies] -anyhow = { workspace = true } -cxx = { workspace = true } datafusion = { workspace = true } -itertools = { workspace = true } -num-traits = { workspace = true } -thiserror = { workspace = true } tiledb-arrow = { workspace = true } -tiledb-cxx-interface = { workspace = true } -tiledb-datatype = { workspace = true } - -[build-dependencies] -cxx-build = { workspace = true } diff --git a/tiledb/oxidize/expr/src/lib.rs b/tiledb/oxidize/expr/src/lib.rs index 3c7995b928a..cfbd5121ce3 100644 --- a/tiledb/oxidize/expr/src/lib.rs +++ b/tiledb/oxidize/expr/src/lib.rs @@ -1,69 +1 @@ -#[cxx::bridge] -mod ffi { - #[namespace = "tiledb::sm"] - extern "C++" { - include!("tiledb/sm/array_schema/array_schema.h"); - include!("tiledb/sm/query/ast/query_ast.h"); - - type ArraySchema = tiledb_cxx_interface::sm::array_schema::ArraySchema; - type ASTNode = tiledb_cxx_interface::sm::query::ast::ASTNode; - type Datatype = tiledb_cxx_interface::sm::enums::Datatype; - } - - extern "C++" { - include!("tiledb/oxidize/arrow.h"); - - #[namespace = "tiledb::oxidize::arrow::record_batch"] - type ArrowRecordBatch = tiledb_arrow::record_batch::ArrowRecordBatch; - - #[namespace = "tiledb::oxidize::arrow::schema"] - type ArrowSchema = tiledb_arrow::schema::ArrowSchema; - } - - #[namespace = "tiledb::oxidize::datafusion::logical_expr"] - extern "Rust" { - type LogicalExpr; - fn to_string(&self) -> String; - - #[cxx_name = "create"] - fn query_condition_to_logical_expr( - schema: &ArraySchema, - query_condition: &ASTNode, - ) -> Result>; - } - - #[namespace = "tiledb::oxidize::datafusion::physical_expr"] - extern "Rust" { - type PhysicalExpr; - fn evaluate(&self, records: &ArrowRecordBatch) -> Result>; - - // TODO: we can avoid the double box using the trait object trick, - // see the pdavis 65154 branch - #[cxx_name = "create"] - fn create_physical_expr( - schema: &ArrowSchema, - expr: Box, - ) -> Result>; - } - - #[namespace = "tiledb::oxidize::datafusion::physical_expr"] - extern "Rust" { - type PhysicalExprOutput; - - fn is_scalar(&self) -> bool; - fn is_array(&self) -> bool; - - fn cast_to(&self, datatype: Datatype) -> Result>; - - fn values_u8(&self) -> Result<&[u8]>; - fn values_u64(&self) -> Result<&[u64]>; - } -} - -mod logical_expr; -mod physical_expr; -mod query_condition; - -pub use logical_expr::LogicalExpr; -pub use physical_expr::{PhysicalExpr, PhysicalExprOutput, create_physical_expr}; -pub use query_condition::to_datafusion as query_condition_to_logical_expr; +pub mod logical_expr; diff --git a/tiledb/oxidize/expr/src/logical_expr.rs b/tiledb/oxidize/expr/src/logical_expr.rs index b5c57db0b97..ec9573e1398 100644 --- a/tiledb/oxidize/expr/src/logical_expr.rs +++ b/tiledb/oxidize/expr/src/logical_expr.rs @@ -1,14 +1,35 @@ //! Provides definitions for interacting with DataFusion logical expressions. -use std::fmt::{Display, Formatter, Result as FmtResult}; - +use datafusion::common::DataFusionError; +use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; use datafusion::logical_expr::Expr; -/// Wraps a DataFusion [Expr] for passing across the FFI boundary. -pub struct LogicalExpr(pub Expr); +/// Returns a list of the names of the columns used in this expression. +pub fn columns(expr: &Expr) -> impl Iterator { + expr.column_refs().into_iter().map(|c| c.name.as_ref()) +} + +/// Returns true if `expr` contains aggregate functions and false otherwise. +pub fn has_aggregate_functions(expr: &Expr) -> bool { + let rec = expr.visit(&mut AggregateFunctionChecker::default()); + let rec = { + // SAFETY: AggregateFunctionChecker does not return any errors + rec.unwrap() + }; + matches!(rec, TreeNodeRecursion::Stop) +} + +#[derive(Default)] +struct AggregateFunctionChecker {} + +impl TreeNodeVisitor<'_> for AggregateFunctionChecker { + type Node = Expr; -impl Display for LogicalExpr { - fn fmt(&self, f: &mut Formatter) -> FmtResult { - self.0.human_display().fmt(f) + fn f_down(&mut self, node: &Self::Node) -> Result { + if matches!(node, Expr::AggregateFunction(_)) { + Ok(TreeNodeRecursion::Stop) + } else { + Ok(TreeNodeRecursion::Continue) + } } } diff --git a/tiledb/oxidize/expr/src/physical_expr.rs b/tiledb/oxidize/expr/src/physical_expr.rs deleted file mode 100644 index 911e99891e2..00000000000 --- a/tiledb/oxidize/expr/src/physical_expr.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! Provides definitions for compiling DataFusion logical expressions -//! into DataFusion physical expressions which can be evaluated; -//! and definitions for evaluating those physical expressions. - -use std::ops::Deref; -use std::sync::Arc; - -use datafusion::common::arrow::datatypes::DataType as ArrowDataType; -use datafusion::common::arrow::{array as aa, compute, datatypes as adt}; -use datafusion::common::{DFSchema, DataFusionError, ScalarValue}; -use datafusion::execution::context::ExecutionProps; -use datafusion::logical_expr::ColumnarValue; -use datafusion::physical_plan::PhysicalExpr as DatafusionPhysicalExpr; -use tiledb_arrow::record_batch::ArrowRecordBatch; -use tiledb_arrow::schema::ArrowSchema; -use tiledb_cxx_interface::sm::enums::Datatype; - -use crate::LogicalExpr; - -/// An error using a [PhysicalExpr]. -#[derive(Debug, thiserror::Error)] -pub enum PhysicalExprError { - #[error("Compiling expression: {0}")] - Create(#[source] DataFusionError), - #[error("Evaluate expression: {0}")] - Evaluate(#[source] DataFusionError), -} - -/// An error using the output of physical expression evaluation. -#[derive(Debug, thiserror::Error)] -pub enum PhysicalExprOutputError { - #[error("Target type is unavailable: {0}")] - TypeUnavailable(#[source] tiledb_arrow::schema::FieldError), - #[error("Cast expression result: {0}")] - Cast(#[source] DataFusionError), - #[error("Cannot read array as static datatype '{0}': found '{1}'")] - InvalidStaticType(&'static str, ArrowDataType), -} - -/// Wraps a DataFusion [PhysicalExpr] for passing across the FFI boundary. -pub struct PhysicalExpr(Arc); - -impl PhysicalExpr { - pub fn evaluate( - &self, - records: &ArrowRecordBatch, - ) -> Result, PhysicalExprError> { - Ok(Box::new(PhysicalExprOutput( - self.0 - .evaluate(&records.arrow) - .map_err(PhysicalExprError::Evaluate)?, - ))) - } -} - -/// Returns a [PhysicalExpr] which evaluates a [LogicalExpr] for the given `schema`. -pub fn create_physical_expr( - schema: &ArrowSchema, - expr: Box, -) -> Result, PhysicalExprError> { - let dfschema = DFSchema::from_field_specific_qualified_schema( - vec![None; schema.fields.len()], - schema.deref(), - ) - .map_err(PhysicalExprError::Create)?; - let dfexpr = - datafusion::physical_expr::create_physical_expr(&expr.0, &dfschema, &ExecutionProps::new()) - .map_err(PhysicalExprError::Create)?; - Ok(Box::new(PhysicalExpr(dfexpr))) -} - -/// Wraps the output of physical expression evaluation for passing across the FFI boundary. -pub struct PhysicalExprOutput(ColumnarValue); - -impl PhysicalExprOutput { - pub fn is_scalar(&self) -> bool { - matches!(self.0, ColumnarValue::Scalar(_)) - } - - pub fn is_array(&self) -> bool { - matches!(self.0, ColumnarValue::Array(_)) - } - - /// Cast `self` to a target datatype. - pub fn cast_to( - &self, - datatype: Datatype, - ) -> Result, PhysicalExprOutputError> { - let arrow_type = tiledb_arrow::schema::arrow_datatype(datatype) - .map_err(PhysicalExprOutputError::TypeUnavailable)?; - let columnar_value = match &self.0 { - ColumnarValue::Scalar(s) => ColumnarValue::Scalar( - s.cast_to(&arrow_type) - .map_err(PhysicalExprOutputError::Cast)?, - ), - ColumnarValue::Array(a) => { - ColumnarValue::Array(compute::kernels::cast::cast(a, &arrow_type).map_err(|e| { - PhysicalExprOutputError::Cast(DataFusionError::ArrowError(e, None)) - })?) - } - }; - Ok(Box::new(PhysicalExprOutput(columnar_value))) - } - - /// Returns the result as a `&[u8]` if it is of that type, - /// and returns `Err` otherwise. - pub fn values_u8(&self) -> Result<&[u8], PhysicalExprOutputError> { - match &self.0 { - ColumnarValue::Scalar(s) => match s { - ScalarValue::UInt8(maybe_byte) => Ok(maybe_byte.as_slice()), - _ => Err(PhysicalExprOutputError::InvalidStaticType( - "u8", - s.data_type().clone(), - )), - }, - ColumnarValue::Array(a) => { - if *a.data_type() == adt::DataType::UInt8 { - // SAFETY: type check right above this - let primitive_array = a.as_any().downcast_ref::().unwrap(); - Ok(primitive_array.values().as_ref()) - } else { - Err(PhysicalExprOutputError::InvalidStaticType( - "u8", - a.data_type().clone(), - )) - } - } - } - } - - /// Returns the result as a `&[u64]` if it is of that type, - /// and returns `Err` otherwise. - pub fn values_u64(&self) -> Result<&[u64], PhysicalExprOutputError> { - match &self.0 { - ColumnarValue::Scalar(s) => match s { - ScalarValue::UInt64(maybe_value) => Ok(maybe_value.as_slice()), - _ => Err(PhysicalExprOutputError::InvalidStaticType( - "u64", - s.data_type().clone(), - )), - }, - ColumnarValue::Array(a) => { - if *a.data_type() == adt::DataType::UInt64 { - // SAFETY: type check right above this - let primitive_array = a.as_any().downcast_ref::().unwrap(); - Ok(primitive_array.values().as_ref()) - } else { - Err(PhysicalExprOutputError::InvalidStaticType( - "u64", - a.data_type().clone(), - )) - } - } - } - } -} diff --git a/tiledb/oxidize/query-predicates/Cargo.toml b/tiledb/oxidize/query-predicates/Cargo.toml new file mode 100644 index 00000000000..f085ec6a3dc --- /dev/null +++ b/tiledb/oxidize/query-predicates/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "tiledb-query-predicates" +edition = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[dependencies] +anyhow = { workspace = true } +arrow = { workspace = true } +cxx = { workspace = true } +datafusion = { workspace = true } +itertools = { workspace = true } +num-traits = { workspace = true } +thiserror = { workspace = true } +tiledb-arrow = { workspace = true } +tiledb-cxx-interface = { workspace = true } +tiledb-expr = { workspace = true } + +[build-dependencies] +cxx-build = { workspace = true } diff --git a/tiledb/oxidize/expr/build.rs b/tiledb/oxidize/query-predicates/build.rs similarity index 100% rename from tiledb/oxidize/expr/build.rs rename to tiledb/oxidize/query-predicates/build.rs diff --git a/tiledb/oxidize/query-predicates/src/lib.rs b/tiledb/oxidize/query-predicates/src/lib.rs new file mode 100644 index 00000000000..7623937c9ef --- /dev/null +++ b/tiledb/oxidize/query-predicates/src/lib.rs @@ -0,0 +1,377 @@ +#[cxx::bridge] +mod ffi { + #[namespace = "tiledb::sm"] + extern "C++" { + include!("tiledb/sm/array_schema/array_schema.h"); + include!("tiledb/sm/query/readers/result_tile.h"); + + type ArraySchema = tiledb_cxx_interface::sm::array_schema::ArraySchema; + type ResultTile = tiledb_cxx_interface::sm::query::readers::ResultTile; + } + + #[namespace = "tiledb::oxidize"] + extern "Rust" { + type QueryPredicates; + + #[cxx_name = "new_query_predicates"] + fn new_query_predicates_ffi(schema: &ArraySchema) -> Result>; + + fn compile(&mut self) -> Result<()>; + + #[allow(clippy::needless_lifetimes)] // NB: cxx does not seem to handle this lint + unsafe fn field_names<'a>(&'a self) -> Vec<&'a str>; + + #[cxx_name = "add_predicate"] + fn add_text_predicate(&mut self, expr: &str) -> Result<()>; + + fn evaluate_into_bitmap_u8(&self, tile: &ResultTile, bitmap: &mut [u8]) -> Result<()>; + fn evaluate_into_bitmap_u64(&self, tile: &ResultTile, bitmap: &mut [u64]) -> Result<()>; + } +} + +use std::sync::Arc; + +use arrow::datatypes::{DataType, Schema as ArrowSchema}; +use datafusion::common::tree_node::TreeNode; +use datafusion::common::{DFSchema, ScalarValue}; +use datafusion::execution::context::ExecutionProps; +use datafusion::execution::context::SessionContext; +use datafusion::execution::session_state::SessionStateBuilder; +use datafusion::logical_expr::{Expr, ExprSchemable}; +use datafusion::physical_plan::{ColumnarValue, PhysicalExpr}; +use itertools::Itertools; +use num_traits::Zero; +use tiledb_arrow::schema::WhichSchema; +use tiledb_cxx_interface::sm::array_schema::ArraySchema; +use tiledb_cxx_interface::sm::query::readers::ResultTile; + +#[derive(Debug, thiserror::Error)] +pub enum ParseExprError { + #[error("Schema error: {0}")] + Schema(#[from] tiledb_arrow::schema::Error), +} + +#[derive(Debug, thiserror::Error)] +pub enum AddPredicateError { + #[error("Query is in progress")] + InvalidState, + #[error("Parse error: {0}")] + Parse(#[source] datafusion::common::DataFusionError), + #[error("Expression is not a predicate: found return type {0}")] + NotAPredicate(DataType), + #[error("Expression contains aggregate functions which are not supported in predicates")] + ContainsAggregateFunctions, + #[error("Type coercion error: {0}")] + TypeCoercion(#[source] datafusion::common::DataFusionError), + #[error("Output type error: {0}")] + OutputType(#[source] datafusion::common::DataFusionError), +} + +#[derive(Debug, thiserror::Error)] +pub enum CompileError { + #[error("Query is in progress")] + InvalidState, + #[error("Expression compile error: {0}")] + PhysicalExpr(#[source] datafusion::common::DataFusionError), +} + +#[derive(Debug, thiserror::Error)] +pub enum EvaluatePredicateError { + #[error("Query has not been started")] + InvalidState, + #[error("Data error: {0}")] + ResultTile(#[from] tiledb_arrow::record_batch::Error), + #[error("Evaluation error: {0}")] + Evaluate(#[source] datafusion::common::DataFusionError), +} + +/// Holds state to parse, analyze and evaluate predicates of a TileDB query. +pub enum QueryPredicates { + /// Predicates are being added to the query. + Build(Builder), + /// The query is being evaluated. + Evaluate(Evaluator), +} + +impl QueryPredicates { + pub fn new(schema: &ArraySchema) -> Result { + Ok(Self::Build(Builder::new(schema, WhichSchema::View)?)) + } + + /// Parses a text predicate into a logical expression and adds it to the list of predicates to + /// evaluate. + /// + /// This is only valid from the `Build` state. + pub fn add_text_predicate(&mut self, expr: &str) -> Result<(), AddPredicateError> { + match self { + Self::Build(builder) => builder.add_text_predicate(expr), + Self::Evaluate(_) => Err(AddPredicateError::InvalidState), + } + } + + /// Transitions state from `Build` to `Evaluate`. + pub fn compile(&mut self) -> Result<(), CompileError> { + match self { + Self::Build(builder) => { + *self = Self::Evaluate(builder.compile()?); + Ok(()) + } + Self::Evaluate(_) => Err(CompileError::InvalidState), + } + } + + /// Returns a list of unique field names which are used in the predicates. + pub fn field_names(&self) -> Vec<&str> { + match self { + Self::Build(builder) => builder.field_names(), + Self::Evaluate(evaluator) => evaluator.field_names(), + } + } + + pub fn evaluate(&self, tile: &ResultTile) -> Result { + match self { + Self::Build(_) => Err(EvaluatePredicateError::InvalidState), + Self::Evaluate(evaluator) => evaluator.evaluate(tile), + } + } + + pub fn evaluate_into_bitmap( + &self, + tile: &ResultTile, + bitmap: &mut [T], + ) -> Result<(), EvaluatePredicateError> + where + T: Copy + Zero, + { + match self { + Self::Build(_) => Err(EvaluatePredicateError::InvalidState), + Self::Evaluate(evaluator) => evaluator.evaluate_into_bitmap(tile, bitmap), + } + } + + fn evaluate_into_bitmap_u8( + &self, + tile: &ResultTile, + bitmap: &mut [u8], + ) -> Result<(), EvaluatePredicateError> { + self.evaluate_into_bitmap::(tile, bitmap) + } + + fn evaluate_into_bitmap_u64( + &self, + tile: &ResultTile, + bitmap: &mut [u64], + ) -> Result<(), EvaluatePredicateError> { + self.evaluate_into_bitmap::(tile, bitmap) + } +} + +/// Structure which accumulates predicates. +pub struct Builder { + /// DataFusion evaluation context. + dfsession: SessionContext, + /// Array schema mapped onto DataFusion data types. + dfschema: DFSchema, + /// Logical syntax tree representations of the predicates. + logical_exprs: Vec, +} + +impl Builder { + pub fn new( + schema: &ArraySchema, + which: WhichSchema, + ) -> Result { + let (arrow_schema, _) = tiledb_arrow::schema::to_arrow(schema, which)?; + let dfschema = { + // SAFETY: this only errors if the names are not unique, + // which they will be because `ArraySchema` requires it + DFSchema::try_from(arrow_schema).unwrap() + }; + + Ok(Builder { + dfsession: SessionContext::from( + SessionStateBuilder::new_with_default_features().build(), + ), + dfschema, + logical_exprs: vec![], + }) + } + + /// Returns a list of all of the field names used in all of the predicates. + pub fn field_names(&self) -> Vec<&str> { + self.logical_exprs + .iter() + .flat_map(tiledb_expr::logical_expr::columns) + .unique() + .collect() + } + + /// Parses a predicate into a logical expression and adds it to the list of predicates to + /// evaluate. + pub fn add_text_predicate(&mut self, expr: &str) -> Result<(), AddPredicateError> { + let parsed_expr = self + .dfsession + .parse_sql_expr(expr, &self.dfschema) + .map_err(AddPredicateError::Parse)?; + + let mut coercion_rewriter = + datafusion::optimizer::analyzer::type_coercion::TypeCoercionRewriter::new( + &self.dfschema, + ); + let logical_expr = parsed_expr + .rewrite(&mut coercion_rewriter) + .map(|t| t.data) + .map_err(AddPredicateError::TypeCoercion)?; + + self.add_predicate(logical_expr) + } + + /// Adds a predicate to the list of predicates to evaluate. + pub fn add_predicate(&mut self, logical_expr: Expr) -> Result<(), AddPredicateError> { + let output_type = logical_expr + .get_type(&self.dfschema) + .map_err(AddPredicateError::OutputType)?; + if output_type != DataType::Boolean && output_type != DataType::Null { + // NB: see non-pub DataFusion API `Filter::is_allowed_filter_type` + return Err(AddPredicateError::NotAPredicate(output_type)); + } else if tiledb_expr::logical_expr::has_aggregate_functions(&logical_expr) { + return Err(AddPredicateError::ContainsAggregateFunctions); + } + self.logical_exprs.push(logical_expr); + + Ok(()) + } + + /// Returns an `Evaluator` which can evaluate the conjunction of all of the predicates. + pub fn compile(&self) -> Result { + let evaluation_schema = { + let projection_fields = self + .field_names() + .iter() + .map(|fname| self.dfschema.as_arrow().field_with_name(fname)) + .process_results(|fs| fs.cloned().collect::>()); + + let projection_fields = { + // SAFETY: all field names have already been validated as part of the schema + projection_fields.unwrap() + }; + + // SAFETY: this only errors if the names are not unique, + // which they will be because `self.field_names()` produces unique field names + DFSchema::try_from(ArrowSchema::new(projection_fields)).unwrap() + }; + let predicate = { + let execution_props = ExecutionProps::new(); + self.logical_exprs + .iter() + .map(|e| { + datafusion::physical_expr::create_physical_expr( + e, + &evaluation_schema, + &execution_props, + ) + .map_err(CompileError::PhysicalExpr) + }) + .process_results(|es| datafusion::physical_expr::conjunction(es))? + }; + Ok(Evaluator { + dfschema: evaluation_schema, + predicate, + }) + } +} + +pub struct Evaluator { + /// Array schema mapped onto DataFusion data types; this is a projection of the full schema + /// consisting only of the fields which are used to evaluate `self.predicate`. + /// The tiles corresponding to fields in this schema will be converted to [RecordBatch] + /// columns, so to avoid extra conversions (which may allocate memory) we do not + /// want to keep all of the fields here. + dfschema: DFSchema, + /// Expression evaluator which evaluates all predicates as a conjunction. + predicate: Arc, +} + +impl Evaluator { + /// Returns a list of all of the field names used in all of the predicates. + pub fn field_names(&self) -> Vec<&str> { + self.dfschema + .fields() + .iter() + .map(|f| f.name().as_ref()) + .collect::>() + } + + pub fn evaluate(&self, tile: &ResultTile) -> Result { + let rb = unsafe { + // SAFETY: "This function is safe to call as long as the returned + // RecordBatch is not used after the ResultTile is destructed." + // The RecordBatch only lives in this stack frame, so we will follow this contract. + tiledb_arrow::record_batch::to_record_batch(self.dfschema.inner(), tile)? + }; + self.predicate + .evaluate(&rb) + .map_err(EvaluatePredicateError::Evaluate) + } + + pub fn evaluate_into_bitmap( + &self, + tile: &ResultTile, + bitmap: &mut [T], + ) -> Result<(), EvaluatePredicateError> + where + T: Copy + Zero, + { + // TODO: consider not evaluating on cells where the bitmap is already set. + // This might happen if there is a historical query condition or if there + // is timestamp duplication. + + let result = self.evaluate(tile)?; + match result { + ColumnarValue::Scalar(s) => match s { + ScalarValue::Boolean(Some(true)) => { + // all cells pass predicates, no need to update bitmap + Ok(()) + } + ScalarValue::Boolean(Some(false)) => { + // no cells pass predicates, clear bitmap + bitmap.fill(T::zero()); + Ok(()) + } + ScalarValue::Null | ScalarValue::Boolean(None) => { + // no cells pass predicates, clear bitmap + bitmap.fill(T::zero()); + Ok(()) + } + _ => { + // should not be reachable due to return type check in `Builder::add_predicate` + unreachable!() + } + }, + ColumnarValue::Array(a) => { + if *a.data_type() == DataType::Boolean { + let bools = arrow::array::as_boolean_array(&a); + for (i, b) in bools.iter().enumerate() { + if !matches!(b, Some(true)) { + bitmap[i] = T::zero(); + } + } + Ok(()) + } else if *a.data_type() == DataType::Null { + // no cells pass predicates, clear bitmap + bitmap.fill(T::zero()); + Ok(()) + } else { + // should not be reachable due to return type check in `Builder::add_predicate` + unreachable!() + } + } + } + } +} + +fn new_query_predicates_ffi( + schema: &ArraySchema, +) -> Result, tiledb_arrow::schema::Error> { + Ok(Box::new(QueryPredicates::new(schema)?)) +} diff --git a/tiledb/oxidize/staticlibs/core-objects/Cargo.toml b/tiledb/oxidize/staticlibs/core-objects/Cargo.toml index 54e72f5d4cc..d52d22db809 100644 --- a/tiledb/oxidize/staticlibs/core-objects/Cargo.toml +++ b/tiledb/oxidize/staticlibs/core-objects/Cargo.toml @@ -7,6 +7,7 @@ version = { workspace = true } [dependencies] tiledb-arrow = { workspace = true } tiledb-expr = { workspace = true } +tiledb-query-predicates = { workspace = true } [lib] name = "tiledb_core_objects_rs" diff --git a/tiledb/oxidize/staticlibs/core-objects/src/lib.rs b/tiledb/oxidize/staticlibs/core-objects/src/lib.rs index 85dea7f0663..9f4f772e65c 100644 --- a/tiledb/oxidize/staticlibs/core-objects/src/lib.rs +++ b/tiledb/oxidize/staticlibs/core-objects/src/lib.rs @@ -1,2 +1,3 @@ pub use tiledb_arrow; pub use tiledb_expr; +pub use tiledb_query_predicates; diff --git a/tiledb/oxidize/staticlibs/unit-query-condition/Cargo.toml b/tiledb/oxidize/staticlibs/unit-query-condition/Cargo.toml index b56b9e155c6..27a434af806 100644 --- a/tiledb/oxidize/staticlibs/unit-query-condition/Cargo.toml +++ b/tiledb/oxidize/staticlibs/unit-query-condition/Cargo.toml @@ -14,6 +14,7 @@ tiledb-cxx-interface = { workspace = true } tiledb-expr = { workspace = true } tiledb-pod = { workspace = true } tiledb-common = { workspace = true } +tiledb-query-predicates = { workspace = true } tiledb-test-array-schema = { workspace = true } tiledb-test-cells = { workspace = true } tiledb-test-ffi = { workspace = true } diff --git a/tiledb/oxidize/staticlibs/unit-query-condition/src/lib.rs b/tiledb/oxidize/staticlibs/unit-query-condition/src/lib.rs index b98aaed440c..4539a839b58 100644 --- a/tiledb/oxidize/staticlibs/unit-query-condition/src/lib.rs +++ b/tiledb/oxidize/staticlibs/unit-query-condition/src/lib.rs @@ -1,5 +1,6 @@ pub use tiledb_arrow; pub use tiledb_expr; +pub use tiledb_query_predicates; #[cxx::bridge] mod ffi { @@ -14,6 +15,15 @@ mod ffi { type ResultTile = tiledb_cxx_interface::sm::query::readers::ResultTile; } + #[namespace = "tiledb::test::query_condition_datafusion"] + extern "Rust" { + fn evaluate_as_datafusion( + array_schema: &ArraySchema, + query_condition: &ASTNode, + tile: &ResultTile, + ) -> Result>; + } + #[namespace = "tiledb::test::query_condition_datafusion"] unsafe extern "C++" { include!("tiledb/oxidize/staticlibs/unit-query-condition/cc/oxidize.h"); @@ -48,12 +58,38 @@ use arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField, Schema as use arrow::record_batch::RecordBatch; use proptest::prelude::*; use proptest::test_runner::{TestCaseError, TestRunner}; +use tiledb_arrow::schema::WhichSchema; use tiledb_common::query::condition::QueryConditionExpr; use tiledb_common::query::condition::strategy::Parameters as QueryConditionParameters; +use tiledb_cxx_interface::sm::array_schema::ArraySchema; +use tiledb_cxx_interface::sm::query::ast::ASTNode; +use tiledb_cxx_interface::sm::query::readers::ResultTile; use tiledb_pod::array::schema::SchemaData; use tiledb_pod::array::schema::strategy::Requirements as SchemaRequirements; -use tiledb_test_cells::Cells; use tiledb_test_cells::strategy::{CellsParameters, CellsStrategySchema, SchemaWithDomain}; +use tiledb_test_cells::{Cells, FieldData}; + +fn evaluate_as_datafusion( + array_schema: &ArraySchema, + query_condition: &ASTNode, + tile: &ResultTile, +) -> anyhow::Result> { + let logical_expr = tiledb_test_query_condition::logical_expr::to_datafusion( + array_schema, + WhichSchema::Storage, + query_condition, + )?; + + let mut qbuilder = tiledb_query_predicates::Builder::new(array_schema, WhichSchema::Storage)?; + qbuilder.add_predicate(logical_expr)?; + + let qeval = qbuilder.compile()?; + + let mut bitmap = vec![1u8; tile.cell_num() as usize]; + qeval.evaluate_into_bitmap::(tile, &mut bitmap)?; + + Ok(bitmap) +} fn instance_query_condition_datafusion( schema: &SchemaData, @@ -279,6 +315,33 @@ fn examples_query_condition_datafusion_impl() -> anyhow::Result { Ok(true) } +fn cells_ensure_utf8(schema: &SchemaData, cells: Cells) -> Cells { + let mut new_fields = cells.fields().clone(); + for (fname, fdata) in new_fields.iter_mut() { + let Some(field) = schema.field(fname.clone()) else { + continue; + }; + + use tiledb_common::array::CellValNum; + use tiledb_common::datatype::Datatype; + + if matches!(field.cell_val_num(), Some(CellValNum::Var)) + && matches!( + field.datatype(), + Datatype::StringAscii | Datatype::StringUtf8 + ) + { + let FieldData::VecUInt8(strs) = fdata else { + continue; + }; + strs.iter_mut().for_each(|s| { + *s = String::from_utf8_lossy(s).into_owned().into_bytes(); + }); + } + } + Cells::new(new_fields) +} + /// Returns a [Strategy] which produces inputs to `instance_query_condition_datafusion`. fn strat_query_condition_datafusion() -> impl Strategy, Rc, Vec)> { @@ -293,23 +356,21 @@ fn strat_query_condition_datafusion() any_with::(schema_params.into()) .prop_flat_map(|schema| { let schema = Rc::new(schema); + let schema_move_into_strat = Rc::clone(&schema); let strat_cells = any_with::(CellsParameters { schema: Some(CellsStrategySchema::WriteSchema(Rc::clone(&schema))), ..Default::default() - }); + }) + .prop_map(move |cells| cells_ensure_utf8(&schema_move_into_strat, cells)); (Just(schema), strat_cells) }) .prop_flat_map(|(schema, cells)| { let cells = Rc::new(cells); - let strat_params = any_with::(QueryConditionParameters { + let strat_qc = any_with::(QueryConditionParameters { domain: Some(Rc::new(SchemaWithDomain::new(Rc::clone(&schema), &cells))), ..Default::default() }); - ( - Just(schema), - Just(cells), - strat_params.prop_map(|qc| vec![qc]), - ) + (Just(schema), Just(cells), strat_qc.prop_map(|qc| vec![qc])) }) } diff --git a/tiledb/oxidize/test-support/query-condition/Cargo.toml b/tiledb/oxidize/test-support/query-condition/Cargo.toml index 2c213c353b0..a20e5b0a040 100644 --- a/tiledb/oxidize/test-support/query-condition/Cargo.toml +++ b/tiledb/oxidize/test-support/query-condition/Cargo.toml @@ -7,6 +7,12 @@ version = { workspace = true } [dependencies] anyhow = { workspace = true } cxx = { workspace = true } +datafusion = { workspace = true } +itertools = { workspace = true } +num-traits = { workspace = true } +thiserror = { workspace = true } +tiledb-arrow = { workspace = true } tiledb-common = { workspace = true } tiledb-cxx-interface = { workspace = true } +tiledb-datatype = { workspace = true } tiledb-test-support-cxx-interface = { workspace = true } diff --git a/tiledb/oxidize/test-support/query-condition/src/lib.rs b/tiledb/oxidize/test-support/query-condition/src/lib.rs index b10505b4bb3..8f90cb2445c 100644 --- a/tiledb/oxidize/test-support/query-condition/src/lib.rs +++ b/tiledb/oxidize/test-support/query-condition/src/lib.rs @@ -5,6 +5,7 @@ //! using the strategies we have already written in `tiledb_common`. mod enums; +pub mod logical_expr; use tiledb_common::query::condition::*; use tiledb_cxx_interface::sm::query::ast::ASTNode; diff --git a/tiledb/oxidize/expr/src/query_condition.rs b/tiledb/oxidize/test-support/query-condition/src/logical_expr.rs similarity index 79% rename from tiledb/oxidize/expr/src/query_condition.rs rename to tiledb/oxidize/test-support/query-condition/src/logical_expr.rs index 2806eaa2b20..82ed92b3d1e 100644 --- a/tiledb/oxidize/expr/src/query_condition.rs +++ b/tiledb/oxidize/test-support/query-condition/src/logical_expr.rs @@ -8,22 +8,21 @@ use datafusion::common::arrow::array::{ self as aa, Array as ArrowArray, ArrayData, FixedSizeListArray, GenericListArray, }; use datafusion::common::arrow::buffer::OffsetBuffer; -use datafusion::common::arrow::datatypes::Field as ArrowField; +use datafusion::common::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField}; use datafusion::common::{Column, ScalarValue}; use datafusion::logical_expr::expr::InList; use datafusion::logical_expr::{BinaryExpr, Expr, Operator}; use itertools::Itertools; use num_traits::FromBytes; use tiledb_arrow::offsets::Error as OffsetsError; +use tiledb_arrow::schema::WhichSchema; use tiledb_cxx_interface::sm::array_schema::{ArraySchema, CellValNum, Field}; use tiledb_cxx_interface::sm::enums::{Datatype, QueryConditionCombinationOp, QueryConditionOp}; use tiledb_cxx_interface::sm::misc::ByteVecValue; use tiledb_cxx_interface::sm::query::ast::ASTNode; use tiledb_datatype::apply_physical_type; -use crate::logical_expr::LogicalExpr; - -/// An error constructing a [LogicalExpr] for a query condition. +/// An error constructing an [Expr] for a query condition. #[derive(Debug, thiserror::Error)] pub enum Error { #[error("Query condition expression internal error: {0}")] @@ -68,6 +67,8 @@ pub enum UserError { InListCellValNumMismatch(CellValNum, usize), #[error("Variable-length data offsets: ")] InListVarOffsets(#[from] OffsetsError), + #[error("Invalid query condition operand: {0}")] + ExpectedUtf8(#[source] std::string::FromUtf8Error), } /// Returns an iterator over the values of type [T] contained in `bytes`. @@ -104,6 +105,7 @@ where fn leaf_ast_to_binary_expr( schema: &ArraySchema, + which: WhichSchema, ast: &ASTNode, op: Operator, ) -> Result { @@ -113,7 +115,13 @@ fn leaf_ast_to_binary_expr( ); }; - fn apply(field: &Field, ast: &ASTNode, operator: Operator) -> Result + fn apply( + schema: &ArraySchema, + which: WhichSchema, + field: &Field, + ast: &ASTNode, + operator: Operator, + ) -> Result where T: FromBytes, ::Bytes: for<'a> TryFrom<&'a [u8]>, @@ -127,9 +135,10 @@ fn leaf_ast_to_binary_expr( .map(ScalarValue::from) .peekable(); - let expect_datatype = tiledb_arrow::schema::field_arrow_datatype(field).map_err(|e| { - InternalError::SchemaField(field.name_cxx().to_string_lossy().into_owned(), e) - })?; + let expect_datatype = tiledb_arrow::schema::field_arrow_datatype(schema, which, field) + .map_err(|e| { + InternalError::SchemaField(field.name_cxx().to_string_lossy().into_owned(), e) + })?; let right = match field.cell_val_num() { CellValNum::Single => { @@ -168,30 +177,38 @@ fn leaf_ast_to_binary_expr( ) } CellValNum::Var => { - let values = if values.peek().is_none() { - aa::make_array(ArrayData::new_empty(&expect_datatype)) + if matches!(expect_datatype, ArrowDataType::LargeUtf8) { + ScalarValue::LargeUtf8(Some( + String::from_utf8(ast.get_data().as_slice().to_vec()) + .map_err(UserError::ExpectedUtf8)?, + )) } else { - // SAFETY: `values` produces a static type, so all will match. - // `values` is also non-empty per `peek`. - ScalarValue::iter_to_array(values).unwrap() - }; - let element_field = ArrowField::new_list_field(values.data_type().clone(), false); - ScalarValue::LargeList( - GenericListArray::::new( - element_field.into(), - OffsetBuffer::::from_lengths(std::iter::once(values.len())), - values, - None, + let values = if values.peek().is_none() { + aa::make_array(ArrayData::new_empty(&expect_datatype)) + } else { + // SAFETY: `values` produces a static type, so all will match. + // `values` is also non-empty per `peek`. + ScalarValue::iter_to_array(values).unwrap() + }; + let element_field = + ArrowField::new_list_field(values.data_type().clone(), false); + ScalarValue::LargeList( + GenericListArray::::new( + element_field.into(), + OffsetBuffer::::from_lengths(std::iter::once(values.len())), + values, + None, + ) + .into(), ) - .into(), - ) + } } }; Ok(Expr::BinaryExpr(BinaryExpr { left: Box::new(column), op: operator, - right: Box::new(Expr::Literal(right)), + right: Box::new(Expr::Literal(right, None)), })) } @@ -199,7 +216,7 @@ fn leaf_ast_to_binary_expr( apply_physical_type!( value_type, NativeType, - apply::(&field, ast, op), + apply::(schema, which, &field, ast, op), |invalid: Datatype| Err(InternalError::InvalidDatatype(invalid.repr.into()).into()) ) } @@ -226,7 +243,7 @@ fn leaf_ast_to_in_list(schema: &ArraySchema, ast: &ASTNode, negated: bool) -> Re let in_list = match field.cell_val_num() { CellValNum::Single => scalars .map(ScalarValue::from) - .map(Expr::Literal) + .map(|s| Expr::Literal(s, None)) .collect::>(), CellValNum::Fixed(nz) => { let fixed_size = nz.get() as usize; @@ -265,8 +282,7 @@ fn leaf_ast_to_in_list(schema: &ArraySchema, ast: &ASTNode, negated: bool) -> Re None, )) }) - .map(ScalarValue::FixedSizeList) - .map(Expr::Literal) + .map(|s| Expr::Literal(ScalarValue::FixedSizeList(s), None)) .collect::>() } CellValNum::Var => { @@ -303,8 +319,7 @@ fn leaf_ast_to_in_list(schema: &ArraySchema, ast: &ASTNode, negated: bool) -> Re None, )) }) - .map(ScalarValue::LargeList) - .map(Expr::Literal) + .map(|s| Expr::Literal(ScalarValue::LargeList(s), None)) .collect::>() } }; @@ -316,13 +331,44 @@ fn leaf_ast_to_in_list(schema: &ArraySchema, ast: &ASTNode, negated: bool) -> Re })) } - let value_type = field.datatype(); - apply_physical_type!( - value_type, - NativeType, - apply::(&field, ast, negated), - |invalid: Datatype| Err(InternalError::InvalidDatatype(invalid.repr.into()).into()) - ) + if matches!( + field.datatype(), + Datatype::STRING_ASCII | Datatype::STRING_UTF8 + ) && field.cell_val_num().is_var() + { + let array_offsets = tiledb_arrow::offsets::try_from_bytes_and_num_values( + field.datatype().value_size(), + ast.get_offsets().as_slice(), + ast.get_data().len(), + ) + .map_err(UserError::from)?; + + let column = Expr::Column(Column::from_name( + field.name().map_err(UserError::FieldNameNotUtf8)?, + )); + let in_list = array_offsets + .windows(2) + .map(|w| { + let elts = ast.get_data().as_slice()[w[0] as usize..w[1] as usize].to_vec(); + String::from_utf8(elts).map_err(UserError::ExpectedUtf8) + }) + .map_ok(|s| Expr::Literal(ScalarValue::LargeUtf8(Some(s)), None)) + .collect::, _>>()?; + + Ok(Expr::InList(InList { + expr: Box::new(column), + list: in_list, + negated, + })) + } else { + let value_type = field.datatype(); + apply_physical_type!( + value_type, + NativeType, + apply::(&field, ast, negated), + |invalid: Datatype| Err(InternalError::InvalidDatatype(invalid.repr.into()).into()) + ) + } } fn leaf_ast_to_null_test(schema: &ArraySchema, ast: &ASTNode) -> Result { @@ -344,16 +390,18 @@ fn leaf_ast_to_null_test(schema: &ArraySchema, ast: &ASTNode) -> Result Ok(Expr::Literal(ScalarValue::Boolean(Some(false)))), + QueryConditionOp::ALWAYS_FALSE => { + Ok(Expr::Literal(ScalarValue::Boolean(Some(false)), None)) + } QueryConditionOp::LT | QueryConditionOp::LE | QueryConditionOp::GT | QueryConditionOp::GE => { // TODO: are these invalid? - Ok(Expr::Literal(ScalarValue::Boolean(Some(false)))) + Ok(Expr::Literal(ScalarValue::Boolean(Some(false)), None)) } invalid => Err(InternalError::InvalidOp(invalid.repr.into()).into()), } @@ -361,12 +409,13 @@ fn leaf_ast_to_null_test(schema: &ArraySchema, ast: &ASTNode) -> Result Result { let mut level = query_condition .children() - .map(|ast| to_datafusion_impl(schema, ast)) + .map(|ast| to_datafusion(schema, which, ast)) .collect::, _>>()?; while level.len() != 1 { @@ -396,21 +445,25 @@ fn combination_ast_to_binary_expr( Ok(level.into_iter().next().unwrap()) } -fn to_datafusion_impl(schema: &ArraySchema, query_condition: &ASTNode) -> Result { +pub fn to_datafusion( + schema: &ArraySchema, + which: WhichSchema, + query_condition: &ASTNode, +) -> Result { if query_condition.is_expr() { match *query_condition.get_combination_op() { QueryConditionCombinationOp::AND => { - combination_ast_to_binary_expr(schema, query_condition, Operator::And) + combination_ast_to_binary_expr(schema, which, query_condition, Operator::And) } QueryConditionCombinationOp::OR => { - combination_ast_to_binary_expr(schema, query_condition, Operator::Or) + combination_ast_to_binary_expr(schema, which, query_condition, Operator::Or) } QueryConditionCombinationOp::NOT => { let children = query_condition.children().collect::>(); if children.len() != 1 { return Err(InternalError::NotTree(children.len()).into()); } - let negate_arg = to_datafusion_impl(schema, children[0])?; + let negate_arg = to_datafusion(schema, which, children[0])?; Ok(!negate_arg) } invalid => Err(InternalError::InvalidCombinationOp(invalid.repr.into()).into()), @@ -421,31 +474,37 @@ fn to_datafusion_impl(schema: &ArraySchema, query_condition: &ASTNode) -> Result match *query_condition.get_op() { QueryConditionOp::LT => Ok(leaf_ast_to_binary_expr( schema, + which, query_condition, Operator::Lt, )?), QueryConditionOp::LE => Ok(leaf_ast_to_binary_expr( schema, + which, query_condition, Operator::LtEq, )?), QueryConditionOp::GT => Ok(leaf_ast_to_binary_expr( schema, + which, query_condition, Operator::Gt, )?), QueryConditionOp::GE => Ok(leaf_ast_to_binary_expr( schema, + which, query_condition, Operator::GtEq, )?), QueryConditionOp::EQ => Ok(leaf_ast_to_binary_expr( schema, + which, query_condition, Operator::Eq, )?), QueryConditionOp::NE => Ok(leaf_ast_to_binary_expr( schema, + which, query_condition, Operator::NotEq, )?), @@ -470,23 +529,13 @@ fn to_datafusion_impl(schema: &ArraySchema, query_condition: &ASTNode) -> Result // which we must replicate here Ok(Expr::IsNotNull(Box::new(column))) } else { - Ok(Expr::Literal(ScalarValue::Boolean(Some(true)))) + Ok(Expr::Literal(ScalarValue::Boolean(Some(true)), None)) } } - QueryConditionOp::ALWAYS_FALSE => Ok(Expr::Literal(ScalarValue::Boolean(Some(false)))), + QueryConditionOp::ALWAYS_FALSE => { + Ok(Expr::Literal(ScalarValue::Boolean(Some(false)), None)) + } invalid => Err(InternalError::InvalidOp(invalid.repr.into()).into()), } } } - -/// Returns a [LogicalExpr] which represents the same expression -/// as the requested query condition. -pub fn to_datafusion( - schema: &ArraySchema, - query_condition: &ASTNode, -) -> Result, Error> { - Ok(Box::new(LogicalExpr(to_datafusion_impl( - schema, - query_condition, - )?))) -} diff --git a/tiledb/oxidize/test-support/result-tile/src/lib.rs b/tiledb/oxidize/test-support/result-tile/src/lib.rs index e90529b0b7c..ed81a7b8991 100644 --- a/tiledb/oxidize/test-support/result-tile/src/lib.rs +++ b/tiledb/oxidize/test-support/result-tile/src/lib.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use std::ops::Deref; use std::sync::Arc; -use arrow::array::{Array as ArrowArray, GenericListArray, PrimitiveArray}; +use arrow::array::{Array as ArrowArray, GenericListArray, NativeAdapter, PrimitiveArray}; use arrow::buffer::OffsetBuffer; use arrow::datatypes::{Field as ArrowField, Schema as ArrowSchema}; use arrow::record_batch::RecordBatch; @@ -17,6 +17,34 @@ use tiledb_cxx_interface::sm::array_schema::{ArraySchema, CellValNum}; use tiledb_cxx_interface::sm::query::readers::ResultTile; use tiledb_test_cells::{Cells, FieldData, typed_field_data_go}; +/// Associates a native type with an `ArrowDataType` value which has the +/// same corresponding native type +pub trait TypeTraits { + type ArrowPrimitiveType; +} + +macro_rules! type_traits { + ($ty:ty, $primitive_type:ident) => { + impl TypeTraits for $ty { + /// Associated `arrow` data type for constructing a `PrimitiveArray`. + // NB: we don't really care about logical type here, if we did we will need a different + // solution. + type ArrowPrimitiveType = arrow::datatypes::$primitive_type; + } + }; +} + +type_traits!(i8, Int8Type); +type_traits!(i16, Int16Type); +type_traits!(i32, Int32Type); +type_traits!(i64, Int64Type); +type_traits!(u8, UInt8Type); +type_traits!(u16, UInt16Type); +type_traits!(u32, UInt32Type); +type_traits!(u64, UInt64Type); +type_traits!(f32, Float32Type); +type_traits!(f64, Float64Type); + /// Packages a `ResultTile` with the buffers which contain the tile data. pub struct PackagedResultTile { /// Buffers underlying the [ResultTile]. @@ -214,14 +242,21 @@ fn cells_to_record_batch(cells: &Cells) -> RecordBatch { fn field_data_to_array(field: &FieldData) -> Arc { typed_field_data_go!( field, - _DT, + DT, cells, - Arc::new(cells.iter().copied().collect::>()) as Arc, + Arc::new( + cells + .iter() + .copied() + .map(NativeAdapter::<
::ArrowPrimitiveType>::from) + .collect::>() + ) as Arc, { let values = cells .iter() .flatten() .copied() + .map(NativeAdapter::<
::ArrowPrimitiveType>::from) .collect::>(); let offsets = OffsetBuffer::::from_lengths(cells.iter().map(|s| s.len())); let cells = GenericListArray::new( diff --git a/tiledb/sm/buffer/buffer.h b/tiledb/sm/buffer/buffer.h index 27048094b2d..cf1c7eb3b25 100644 --- a/tiledb/sm/buffer/buffer.h +++ b/tiledb/sm/buffer/buffer.h @@ -93,6 +93,12 @@ class BufferBase { return static_cast(data_); } + /** Returns the buffer data as bytes (this declaration is seemingly redundant + * but helps with Rust FFI declarations) */ + const uint8_t* bytes() const { + return data_as(); + } + /** * Reads from the local data into the input buffer. * diff --git a/tiledb/sm/c_api/tiledb.cc b/tiledb/sm/c_api/tiledb.cc index d2e9de42ec2..86e82a3f047 100644 --- a/tiledb/sm/c_api/tiledb.cc +++ b/tiledb/sm/c_api/tiledb.cc @@ -584,6 +584,22 @@ int32_t tiledb_query_set_condition( return TILEDB_OK; } +capi_return_t tiledb_query_add_predicate( + tiledb_ctx_t* const ctx, + tiledb_query_t* const query, + const char* const predicate) { + // Sanity check + if (sanity_check(ctx, query) == TILEDB_ERR) { + return TILEDB_ERR; + } else if (predicate == nullptr) { + throw CAPIStatusException("Argument \"predicate\" may not be NULL"); + } + + throw_if_not_ok(query->query_->add_predicate(predicate)); + + return TILEDB_OK; +} + int32_t tiledb_query_finalize(tiledb_ctx_t* ctx, tiledb_query_t* query) { // Trivial case if (query == nullptr) @@ -2775,6 +2791,15 @@ CAPI_INTERFACE( return api_entry(ctx, query, cond); } +CAPI_INTERFACE( + query_add_predicate, + tiledb_ctx_t* const ctx, + tiledb_query_t* const query, + const char* const predicate) { + return api_entry( + ctx, query, predicate); +} + CAPI_INTERFACE(query_finalize, tiledb_ctx_t* ctx, tiledb_query_t* query) { return api_entry(ctx, query); } diff --git a/tiledb/sm/c_api/tiledb_experimental.h b/tiledb/sm/c_api/tiledb_experimental.h index 73c3321168b..8cf0c12167b 100644 --- a/tiledb/sm/c_api/tiledb_experimental.h +++ b/tiledb/sm/c_api/tiledb_experimental.h @@ -453,6 +453,34 @@ TILEDB_EXPORT int32_t tiledb_query_condition_set_use_enumeration( const tiledb_query_condition_t* cond, int use_enumeration) TILEDB_NOEXCEPT; +/* ********************************* */ +/* QUERY PREDICATE */ +/* ********************************* */ + +/** + * Adds a predicate to be applied to a read query. The added predicate + * will be analyzed and evaluated in the subarray step, query condition + * step, or both. + * + * The predicate is parsed as an Apache DataFusion SQL expression and must + * evaluate to a boolean. + * + * **Example:** + * + * @code{.c} + * const char* pred = "(row BETWEEN 1 AND 10) OR (column BETWEEN 1 AND 10)"; + * tiledb_query_add_predicate(ctx, query, pred); + * @endcode + * + * @param ctx The TileDB context. + * @param query The TileDB query. + * @param predicate A text representation of the desired predicate. + */ +TILEDB_EXPORT capi_return_t tiledb_query_add_predicate( + tiledb_ctx_t* ctx, + tiledb_query_t* query, + const char* predicate) TILEDB_NOEXCEPT; + /* ********************************* */ /* QUERY STATUS DETAILS */ /* ********************************* */ diff --git a/tiledb/sm/config/config.cc b/tiledb/sm/config/config.cc index 167e7d62609..6681ee24881 100644 --- a/tiledb/sm/config/config.cc +++ b/tiledb/sm/config/config.cc @@ -123,7 +123,6 @@ const std::string Config::SM_QUERY_SPARSE_GLOBAL_ORDER_PREPROCESS_TILE_MERGE = "32768"; const std::string Config::SM_QUERY_SPARSE_UNORDERED_WITH_DUPS_READER = "refactored"; -const std::string Config::SM_QUERY_CONDITION_EVALUATOR = "ast"; const std::string Config::SM_MEM_MALLOC_TRIM = "true"; const std::string Config::SM_UPPER_MEMORY_LIMIT = "1073741824"; // 1GB const std::string Config::SM_MEM_TOTAL_BUDGET = "10737418240"; // 10GB @@ -325,8 +324,6 @@ const std::map default_config_values = { std::make_pair( "sm.query.sparse_unordered_with_dups.reader", Config::SM_QUERY_SPARSE_UNORDERED_WITH_DUPS_READER), - std::make_pair( - "sm.query.condition_evaluator", Config::SM_QUERY_CONDITION_EVALUATOR), std::make_pair("sm.mem.malloc_trim", Config::SM_MEM_MALLOC_TRIM), std::make_pair( "sm.mem.tile_upper_memory_limit", Config::SM_UPPER_MEMORY_LIMIT), diff --git a/tiledb/sm/config/config.h b/tiledb/sm/config/config.h index 4893839d673..fb87debbc68 100644 --- a/tiledb/sm/config/config.h +++ b/tiledb/sm/config/config.h @@ -253,9 +253,6 @@ class Config { /** Which reader to use for sparse unordered with dups queries. */ static const std::string SM_QUERY_SPARSE_UNORDERED_WITH_DUPS_READER; - /** How to evaluate query conditions */ - static const std::string SM_QUERY_CONDITION_EVALUATOR; - /** Should malloc_trim be called on query/ctx destructors. */ static const std::string SM_MEM_MALLOC_TRIM; diff --git a/tiledb/sm/cpp_api/enumeration_experimental.h b/tiledb/sm/cpp_api/enumeration_experimental.h index a9fe7474be5..79ed0250732 100644 --- a/tiledb/sm/cpp_api/enumeration_experimental.h +++ b/tiledb/sm/cpp_api/enumeration_experimental.h @@ -396,7 +396,7 @@ class Enumeration { static Enumeration create( const Context& ctx, const std::string& name, - std::vector& values, + const std::vector& values, bool ordered = false, std::optional type = std::nullopt) { using DataT = impl::TypeHandler; @@ -449,7 +449,7 @@ class Enumeration { static Enumeration create( const Context& ctx, const std::string& name, - std::vector>& values, + const std::vector>& values, bool ordered = false, std::optional type = std::nullopt) { using DataT = impl::TypeHandler; diff --git a/tiledb/sm/cpp_api/query_experimental.h b/tiledb/sm/cpp_api/query_experimental.h index 1cf8f052c28..9ed9ba8f0cd 100644 --- a/tiledb/sm/cpp_api/query_experimental.h +++ b/tiledb/sm/cpp_api/query_experimental.h @@ -68,6 +68,24 @@ class QueryExperimental { update_value_size)); } + /** + * Adds a predicate to be applied to a read query. The added predicate + * will be analyzed and evaluated in the subarray step, query condition + * step, or both. + * + * The predicate is parsed as an Apache DataFusion SQL expression and must + * evaluate to a boolean. + * + * @param ctx The TileDB context. + * @param query The TileDB query. + * @param predicate A text representation of the desired predicate. + */ + static void add_predicate( + const Context& ctx, Query& query, const std::string& predicate) { + ctx.handle_error(tiledb_query_add_predicate( + ctx.ptr().get(), query.ptr().get(), predicate.c_str())); + } + /** * Get the number of relevant fragments from the subarray. Should only be * called after size estimation was asked for. diff --git a/tiledb/sm/query/legacy/reader.cc b/tiledb/sm/query/legacy/reader.cc index 36dbfb111a1..cd624b37422 100644 --- a/tiledb/sm/query/legacy/reader.cc +++ b/tiledb/sm/query/legacy/reader.cc @@ -246,9 +246,15 @@ Status Reader::dowork() { auto timer_se = stats_->start_timer("dowork"); // Check that the query condition is valid. - if (condition_.has_value()) { - RETURN_NOT_OK(condition_->check(array_schema_)); + if (predicates_.condition_.has_value()) { + RETURN_NOT_OK(predicates_.condition_->check(array_schema_)); } +#ifdef HAVE_RUST + if (predicates_.datafusion_.has_value()) { + return logger_->status(Status_ReaderError( + "tiledb_query_add_predicate is not supported for this query")); + } +#endif if (buffers_.count(constants::delete_timestamps) != 0) { return logger_->status( @@ -357,8 +363,8 @@ Status Reader::load_initial_data() { RETURN_CANCEL_OR_ERROR(generate_timestamped_conditions()); // Make a list of dim/attr that will be loaded for query condition. - if (condition_.has_value()) { - qc_loaded_attr_names_set_.merge(condition_->field_names()); + if (predicates_.condition_.has_value()) { + qc_loaded_attr_names_set_.merge(predicates_.condition_->field_names()); } for (auto delete_and_update_condition : delete_and_update_conditions_) { qc_loaded_attr_names_set_.merge(delete_and_update_condition.field_names()); @@ -382,7 +388,8 @@ Status Reader::apply_query_condition( std::vector& result_tiles, Subarray& subarray, uint64_t stride) { - if ((!condition_.has_value() && delete_and_update_conditions_.empty()) || + if ((!predicates_.has_predicates() && + delete_and_update_conditions_.empty()) || result_cell_slabs.empty()) { return Status::Ok(); } @@ -407,8 +414,8 @@ Status Reader::apply_query_condition( stride = 1; QueryCondition::Params params(query_memory_tracker_, array_schema_); - if (condition_.has_value()) { - RETURN_NOT_OK(condition_->apply( + if (predicates_.condition_.has_value()) { + RETURN_NOT_OK(predicates_.condition_->apply( params, fragment_metadata_, result_cell_slabs, stride)); } @@ -2243,7 +2250,7 @@ tuple> Reader::fill_dense_coords( // Query conditions mutate the result cell slabs to filter attributes. // This path does not use result cell slabs, which will fill coordinates // for cells that should be filtered out. - if (condition_.has_value()) { + if (predicates_.has_predicates()) { return { logger_->status(Status_ReaderError( "Cannot read dense coordinates; dense coordinate " diff --git a/tiledb/sm/query/query.cc b/tiledb/sm/query/query.cc index 751511714ab..8cb446cd929 100644 --- a/tiledb/sm/query/query.cc +++ b/tiledb/sm/query/query.cc @@ -59,6 +59,10 @@ #include "tiledb/sm/storage_manager/storage_manager.h" #include "tiledb/sm/tile/writer_tile_tuple.h" +#ifdef HAVE_RUST +#include "tiledb/oxidize/query_predicates.h" +#endif + #include #include #include @@ -688,7 +692,7 @@ void Query::init() { // Create dimension label queries and remove labels from subarray. if (uses_dimension_labels()) { - if (condition_.has_value()) { + if (predicates_.condition_.has_value()) { throw QueryException( "Cannot init query; Using query conditions and dimension labels " "together is not supported."); @@ -725,6 +729,12 @@ void Query::init() { fragment_name_)); } +#ifdef HAVE_RUST + if (predicates_.datafusion_.has_value()) { + predicates_.datafusion_.value()->compile(); + } +#endif + // Create the query strategy if querying main array and the Subarray does // not need to be updated. if (!only_dim_label_query() && !subarray_.has_label_ranges()) { @@ -762,7 +772,7 @@ const std::optional& Query::condition() const { "queries"); } - return condition_; + return predicates_.condition_; } const std::vector& Query::update_values() const { @@ -816,8 +826,8 @@ Status Query::process() { } } - if (condition_.has_value()) { - auto& names = condition_->enumeration_field_names(); + if (predicates_.condition_.has_value()) { + auto& names = predicates_.condition_->enumeration_field_names(); std::unordered_set deduped_enmr_names; for (auto name : names) { auto attr = array_schema_->attribute(name); @@ -841,29 +851,7 @@ Status Query::process() { return Status::Ok(); })); - condition_->rewrite_for_schema(array_schema()); - - // experimental feature - maybe evaluate using datafusion - const std::string evaluator_param_name = "sm.query.condition_evaluator"; - const auto evaluator = config_.get(evaluator_param_name); - if (evaluator == "datafusion") { -#ifdef HAVE_RUST - auto timer_se = - stats_->start_timer("query_condition_rewrite_to_datafusion"); - condition_->rewrite_to_datafusion(array_schema()); -#else - std::stringstream ss; - ss << "Invalid value for parameter '" << evaluator_param_name - << "': 'datafusion' requires build configuration '-DTILEDB_RUST=ON'"; - throw QueryException(ss.str()); -#endif - } else if (evaluator.has_value() && evaluator != "ast") { - std::stringstream ss; - ss << "Invalid value for parameter '" << evaluator_param_name - << "': found '" << evaluator.value() - << "', expected 'datafusion' or 'ast'"; - throw QueryException(ss.str()); - } + predicates_.condition_->rewrite_for_schema(array_schema()); } if (type_ == QueryType::READ) { @@ -1490,9 +1478,45 @@ Status Query::set_condition(const QueryCondition& condition) { throw std::invalid_argument("Query conditions must not be empty"); } - condition_ = condition; + predicates_.condition_ = condition; + + return Status::Ok(); +} + +Status Query::add_predicate([[maybe_unused]] const char* predicate) { + if (type_ != QueryType::READ) { + return Status_QueryError( + "Cannot add query predicate; Operation only " + "applicable to read queries"); + } + if (status_ != tiledb::sm::QueryStatus::UNINITIALIZED) { + return Status_QueryError( + "Cannot add query predicate; Adding a predicate to an already " + "initialized query is not supported."); + } +#ifndef HAVE_RUST + return Status_QueryError( + "Cannot add query predicate: feature requires build " + "configuration '-DTILEDB_RUST=ON'"); +#else + if (!predicates_.datafusion_.has_value()) { + try { + predicates_.datafusion_.emplace( + tiledb::oxidize::new_query_predicates(array_schema())); + } catch (const rust::Error& e) { + return Status_QueryError( + "Cannot add predicate: Schema error: " + std::string(e.what())); + } + } + try { + predicates_.datafusion_.value()->add_predicate(predicate); + } catch (const rust::Error& e) { + return Status_QueryError( + "Error adding predicate: " + std::string(e.what())); + } return Status::Ok(); +#endif } Status Query::add_update_value( @@ -1667,7 +1691,8 @@ Status Query::submit() { throw_if_not_ok(create_strategy()); // Allocate remote buffer storage for global order writes if necessary. - // If we cache an entire write a query may be uninitialized for N submits. + // If we cache an entire write a query may be uninitialized for N + // submits. if (!query_remote_buffer_storage_.has_value() && type_ == QueryType::WRITE && layout_ == Layout::GLOBAL_ORDER) { query_remote_buffer_storage_.emplace(*this, buffers_); @@ -1801,8 +1826,8 @@ bool Query::is_aggregate(std::string output_field_name) const { /* ****************************** */ Layout Query::effective_layout() const { - // If the user has not set a layout, it will default to row-major, which will - // use the legacy reader on sparse arrays, and fail if aggregates were + // If the user has not set a layout, it will default to row-major, which + // will use the legacy reader on sparse arrays, and fail if aggregates were // specified. However, if only aggregates are specified and no regular data // buffers, the layout doesn't matter and we can transparently switch to the // much more efficient unordered layout. @@ -1828,7 +1853,7 @@ Status Query::create_strategy(bool skip_checks_serialization) { aggregate_buffers_, subarray_, layout, - condition_, + predicates_, default_channel_aggregates_, skip_checks_serialization); if (type_ == QueryType::WRITE || type_ == QueryType::MODIFY_EXCLUSIVE) { @@ -1886,8 +1911,8 @@ Status Query::create_strategy(bool skip_checks_serialization) { all_dense &= frag_md->dense(); } - // We are going to deprecate dense arrays with sparse fragments in 2.27 but - // log a warning for now. + // We are going to deprecate dense arrays with sparse fragments in 2.27 + // but log a warning for now. if (array_schema_->dense() && !all_dense) { LOG_WARN( "This dense array contains sparse fragments. Support for reading " diff --git a/tiledb/sm/query/query.h b/tiledb/sm/query/query.h index c33d32bd559..5ce5c9ee3c2 100644 --- a/tiledb/sm/query/query.h +++ b/tiledb/sm/query/query.h @@ -58,8 +58,24 @@ #include "tiledb/sm/storage_manager/cancellation_source.h" #include "tiledb/sm/subarray/subarray.h" +#ifdef HAVE_RUST +#include "tiledb/oxidize/rust.h" +#endif + using namespace tiledb::common; +namespace tiledb::oxidize::datafusion { + +namespace logical_expr { +struct LogicalExpr; +} + +namespace session { +struct Session; +} + +} // namespace tiledb::oxidize::datafusion + namespace tiledb::sm { class Array; @@ -646,6 +662,14 @@ class Query { */ Status set_condition(const QueryCondition& condition); + /** + * Adds a predicate for filtering results in a read query. + * + * @param predicate A string representation of the desired predicate. + * @return Status + */ + Status add_predicate(const char* predicate); + /** * Adds an update value for an update query. * @@ -1021,8 +1045,8 @@ class Query { /** Stores information about the written fragments. */ std::vector written_fragment_info_; - /** The query condition. */ - std::optional condition_; + /** Query predicates. */ + QueryPredicates predicates_; /** The update values. */ std::vector update_values_; diff --git a/tiledb/sm/query/query_condition.cc b/tiledb/sm/query/query_condition.cc index cd0be2bf5d5..169f7bd15aa 100644 --- a/tiledb/sm/query/query_condition.cc +++ b/tiledb/sm/query/query_condition.cc @@ -42,8 +42,7 @@ #include "tiledb/sm/query/readers/result_cell_slab.h" #ifdef HAVE_RUST -#include "tiledb/oxidize/arrow.h" -#include "tiledb/oxidize/expr.h" +#include "tiledb/oxidize/query_predicates.h" #endif #include @@ -167,30 +166,6 @@ void QueryCondition::rewrite_for_schema(const ArraySchema& array_schema) { tree_->rewrite_for_schema(array_schema); } -#ifdef HAVE_RUST -bool QueryCondition::rewrite_to_datafusion(const ArraySchema& array_schema) { - if (!datafusion_.has_value()) { - std::vector select(field_names().begin(), field_names().end()); - - try { - auto logical_expr = tiledb::oxidize::datafusion::logical_expr::create( - array_schema, *tree_.get()); - auto dfschema = - tiledb::oxidize::arrow::schema::create(array_schema, select); - auto physical_expr = tiledb::oxidize::datafusion::physical_expr::create( - *dfschema, std::move(logical_expr)); - - datafusion_.emplace(std::move(dfschema), std::move(physical_expr)); - } catch (const ::rust::Error& e) { - throw std::logic_error( - "Unexpected error compiling expression: " + std::string(e.what())); - } - return true; - } - return false; -} -#endif - Status QueryCondition::check(const ArraySchema& array_schema) const { if (!tree_) { return Status::Ok(); @@ -2919,26 +2894,8 @@ Status QueryCondition::apply_sparse( const QueryCondition::Params& params, const ResultTile& result_tile, std::span result_bitmap) { -#ifdef HAVE_RUST - if (datafusion_.has_value()) { - try { - datafusion_.value().apply(params, result_tile, result_bitmap); - } catch (const ::rust::Error& e) { - throw std::logic_error( - "Unexpected error evaluating expression: " + std::string(e.what())); - } - } else { - apply_tree_sparse( - tree_, - params, - result_tile, - std::multiplies(), - result_bitmap); - } -#else apply_tree_sparse( tree_, params, result_tile, std::multiplies(), result_bitmap); -#endif return Status::Ok(); } @@ -2959,63 +2916,27 @@ uint64_t QueryCondition::condition_index() const { return condition_index_; } -#ifdef HAVE_RUST -template -void QueryCondition::Datafusion::apply( - const QueryCondition::Params&, - const ResultTile& result_tile, - std::span result_bitmap) const { - const auto arrow = - tiledb::oxidize::arrow::record_batch::create(*schema_, result_tile); - const auto predicate_eval = expr_->evaluate(*arrow); - static_assert( - std::is_same_v || - std::is_same_v); - if constexpr (std::is_same_v) { - const auto predicate_out_u8 = predicate_eval->cast_to(Datatype::UINT8); - const auto bitmap = predicate_out_u8->values_u8(); - if (predicate_out_u8->is_scalar() && bitmap.empty()) { - // all NULLs - for (auto& result : result_bitmap) { - result = 0; - } - } else if (predicate_out_u8->is_scalar()) { - // all the same value - for (auto& result : result_bitmap) { - result = result * bitmap[0]; - } - } else if (bitmap.size() == result_bitmap.size()) { - for (uint64_t i = 0; i < bitmap.size(); i++) { - result_bitmap[i] *= bitmap[i]; - } - } else { - throw std::logic_error( - "Expression evaluation bitmap has unexpected size"); - } +std::unordered_set QueryPredicates::field_names() const { +#ifndef HAVE_RUST + if (condition_.has_value()) { + return condition_.value().field_names(); } else { - const auto predicate_out_u64 = predicate_eval->cast_to(Datatype::UINT64); - const auto bitmap = predicate_out_u64->values_u64(); - if (predicate_out_u64->is_scalar() && bitmap.empty()) { - // all NULLs - for (auto& result : result_bitmap) { - result = 0; - } - } else if (predicate_out_u64->is_scalar()) { - // all the same value - for (auto& result : result_bitmap) { - result = result * bitmap[0]; - } - } else if (bitmap.size() == result_bitmap.size()) { - for (uint64_t i = 0; i < result_bitmap.size(); i++) { - result_bitmap[i] *= bitmap[i]; - } - } else { - throw std::logic_error( - "Expression evaluation bitmap has unexpected size"); + return {}; + } +#else + std::unordered_set ret; + if (condition_.has_value()) { + ret = condition_.value().field_names(); + } + if (datafusion_.has_value()) { + const auto dffields = datafusion_.value()->field_names(); + for (const auto& rstring : dffields) { + ret.insert(std::string(rstring.begin(), rstring.end())); } } -} + return ret; #endif +} // Explicit template instantiations. template Status QueryCondition::apply_sparse( diff --git a/tiledb/sm/query/query_condition.h b/tiledb/sm/query/query_condition.h index 0d9e827c4eb..189dcc3f619 100644 --- a/tiledb/sm/query/query_condition.h +++ b/tiledb/sm/query/query_condition.h @@ -47,11 +47,8 @@ using namespace tiledb::common; -namespace tiledb::oxidize::arrow::schema { -struct ArrowSchema; -} -namespace tiledb::oxidize::datafusion::physical_expr { -struct PhysicalExpr; +namespace tiledb::oxidize { +struct QueryPredicates; } namespace tiledb { @@ -197,21 +194,6 @@ class QueryCondition { */ void rewrite_for_schema(const ArraySchema& array_schema); -#ifdef HAVE_RUST - /** - * If desired and possible, rewrite the query condition to use Datafusion to - * evaluate. - * - * Note that this is basically for testing, this isn't expected to be a - * production feature - we will have other entry points for Datafusion which - * make more sense. Datafusion evaluation appears to be slightly slower, which - * makes some sense since we must create arrow and datafusion data structures. - * - * @return true if a rewrite occurred, false otherwise - */ - bool rewrite_to_datafusion(const ArraySchema& array_schema); -#endif - /** * Verifies that the current state contains supported comparison * operations. Currently, we support the following: @@ -402,29 +384,6 @@ class QueryCondition { /** AST Tree structure representing the condition. **/ tdb_unique_ptr tree_{}; -#ifdef HAVE_RUST - /** Datafusion expression evaluation */ - struct Datafusion { - using BoxSchema = ::rust::Box; - using BoxExpr = - ::rust::Box; - BoxSchema schema_; - BoxExpr expr_; - - Datafusion(BoxSchema&& schema, BoxExpr&& expr) - : schema_(std::move(schema)) - , expr_(std::move(expr)) { - } - - template - void apply( - const QueryCondition::Params& params, - const ResultTile& result_tile, - std::span result_bitmap) const; - }; - std::optional datafusion_; -#endif - /** Caches all field names in the value nodes of the AST. */ mutable std::unordered_set field_names_; @@ -750,6 +709,41 @@ class QueryCondition { std::span result_bitmap) const; }; +/** + * Bundles the different kinds of predicates which can be attached to a query. + */ +struct QueryPredicates { + std::optional condition_; + +#ifdef HAVE_RUST + /** + * Query predicates. + * + * History lesson: + * QueryCondition was added first and provides a C API to construct expression + * trees. QueryPredicates was added later and uses DataFusion to parse text + * predicates and provide much broader evaluation capabilities. + */ + std::optional> datafusion_; +#endif + + /** + * @return true if there are any predicates to apply + */ + bool has_predicates() const { +#ifndef HAVE_RUST + return condition_.has_value(); +#else + return condition_.has_value() || datafusion_.has_value(); +#endif + } + + /** + * @return a set of all unique field names used in the predicates + */ + std::unordered_set field_names() const; +}; + } // namespace sm } // namespace tiledb diff --git a/tiledb/sm/query/readers/dense_reader.cc b/tiledb/sm/query/readers/dense_reader.cc index 1c3023b6edb..1d40c16df7e 100644 --- a/tiledb/sm/query/readers/dense_reader.cc +++ b/tiledb/sm/query/readers/dense_reader.cc @@ -152,9 +152,15 @@ Status DenseReader::dowork() { auto timer_se = stats_->start_timer("dowork"); // Check that the query condition is valid. - if (condition_.has_value()) { - RETURN_NOT_OK(condition_->check(array_schema_)); + if (predicates_.condition_.has_value()) { + RETURN_NOT_OK(predicates_.condition_->check(array_schema_)); } +#ifdef HAVE_RUST + if (predicates_.datafusion_.has_value()) { + throw DenseReaderException( + "tiledb_query_add_predicate is not supported for dense array queries"); + } +#endif get_dim_attr_stats(); @@ -308,8 +314,8 @@ Status DenseReader::dense_read() { } // Compute attribute names to load and copy. - if (condition_.has_value()) { - qc_loaded_attr_names_set_ = condition_->field_names(); + if (predicates_.has_predicates()) { + qc_loaded_attr_names_set_ = predicates_.field_names(); } qc_loaded_attr_names_.clear(); qc_loaded_attr_names_.reserve(qc_loaded_attr_names_set_.size()); @@ -352,7 +358,7 @@ Status DenseReader::dense_read() { uint64_t subarray_start_cell = 0; uint64_t subarray_end_cell = 0; std::vector qc_result( - !condition_.has_value() ? 0 : subarray.cell_num(), 1); + !predicates_.has_predicates() ? 0 : subarray.cell_num(), 1); // Keep track of the current var buffer sizes. std::map var_buffer_sizes; @@ -629,7 +635,7 @@ void DenseReader::init_read_state() { qc_coords_mode_ = config_.get("sm.query.dense.qc_coords_mode", Config::must_find); - if (qc_coords_mode_ && !condition_.has_value()) { + if (qc_coords_mode_ && !predicates_.condition_.has_value()) { throw DenseReaderException( "sm.query.dense.qc_coords_mode requires a query condition"); } @@ -1035,7 +1041,7 @@ Status DenseReader::apply_query_condition( auto timer_se = stats_->start_timer("apply_query_condition"); auto& result_space_tiles = iteration_tile_data->result_space_tiles(); - if (condition_.has_value()) { + if (predicates_.has_predicates()) { // Compute the result of the query condition. std::vector qc_names; qc_names.reserve(condition_names.size()); @@ -1147,7 +1153,7 @@ Status DenseReader::apply_query_condition( *(fragment_metadata_[frag_domains[i].fid()] ->array_schema() .get())); - throw_if_not_ok(condition_->apply_dense( + throw_if_not_ok(predicates_.condition_->apply_dense( params, result_space_tile.result_tile(frag_domains[i].fid()), start, @@ -1723,7 +1729,8 @@ Status DenseReader::copy_fixed_tiles( } // Apply query condition results to this slab. - if (condition_.has_value() && result_space_tile.qc_filtered_results()) { + if (predicates_.has_predicates() && + result_space_tile.qc_filtered_results()) { for (uint64_t c = 0; c < iter.cell_slab_length(); c++) { if (!(qc_result[c + cell_offset] & 0x1)) { memcpy( @@ -1901,7 +1908,8 @@ Status DenseReader::copy_offset_tiles( } } - if (condition_.has_value() && result_space_tile.qc_filtered_results()) { + if (predicates_.has_predicates() && + result_space_tile.qc_filtered_results()) { // Apply query condition results to this slab. for (uint64_t c = 0; c < iter.cell_slab_length(); c++) { if (!(qc_result[c + cell_offset] & 0x1)) { @@ -2076,7 +2084,7 @@ Status DenseReader::aggregate_tiles( } std::vector aggregate_bitmap(iter.cell_slab_length(), 1); - if (condition_.has_value()) { + if (predicates_.has_predicates()) { memcpy( aggregate_bitmap.data(), qc_result.data() + cell_offset, diff --git a/tiledb/sm/query/readers/ordered_dim_label_reader.cc b/tiledb/sm/query/readers/ordered_dim_label_reader.cc index f35a424e8d8..4b791572a4a 100644 --- a/tiledb/sm/query/readers/ordered_dim_label_reader.cc +++ b/tiledb/sm/query/readers/ordered_dim_label_reader.cc @@ -120,7 +120,7 @@ OrderedDimLabelReader::OrderedDimLabelReader( "Cannot initialize ordered dim label reader; Subarray is set"); } - if (condition_.has_value()) { + if (predicates_.has_predicates()) { throw OrderedDimLabelReaderException( "Ordered dimension label reader cannot process query condition"); } diff --git a/tiledb/sm/query/readers/reader_base.cc b/tiledb/sm/query/readers/reader_base.cc index 7cdb1230d5e..2d8f6e0b68e 100644 --- a/tiledb/sm/query/readers/reader_base.cc +++ b/tiledb/sm/query/readers/reader_base.cc @@ -76,7 +76,7 @@ ReaderBase::ReaderBase( stats::Stats* stats, shared_ptr logger, StrategyParams& params) : StrategyBase(stats, logger, params) , memory_tracker_(params.query_memory_tracker()) - , condition_(params.condition()) + , predicates_(params.predicates()) , user_requested_timestamps_(false) , deletes_consolidation_no_purge_( buffers_.count(constants::delete_timestamps) != 0) diff --git a/tiledb/sm/query/readers/reader_base.h b/tiledb/sm/query/readers/reader_base.h index 0242dc4c11b..b5d70e95717 100644 --- a/tiledb/sm/query/readers/reader_base.h +++ b/tiledb/sm/query/readers/reader_base.h @@ -245,8 +245,8 @@ class ReaderBase : public StrategyBase { /** The query's memory tracker. */ shared_ptr memory_tracker_; - /** The query condition. */ - std::optional& condition_; + /** User predicates */ + QueryPredicates& predicates_; /** * The delete and update conditions. diff --git a/tiledb/sm/query/readers/sparse_global_order_reader.cc b/tiledb/sm/query/readers/sparse_global_order_reader.cc index 521102423e0..0997ca14ffc 100644 --- a/tiledb/sm/query/readers/sparse_global_order_reader.cc +++ b/tiledb/sm/query/readers/sparse_global_order_reader.cc @@ -262,8 +262,8 @@ Status SparseGlobalOrderReader::dowork() { stats_->add_counter("loop_num", 1); // Check that the query condition is valid. - if (condition_.has_value()) { - throw_if_not_ok(condition_->check(array_schema_)); + if (predicates_.condition_.has_value()) { + throw_if_not_ok(predicates_.condition_->check(array_schema_)); } get_dim_attr_stats(); diff --git a/tiledb/sm/query/readers/sparse_index_reader_base.cc b/tiledb/sm/query/readers/sparse_index_reader_base.cc index 6a626b3503b..e12dff5c856 100644 --- a/tiledb/sm/query/readers/sparse_index_reader_base.cc +++ b/tiledb/sm/query/readers/sparse_index_reader_base.cc @@ -50,6 +50,10 @@ #include +#ifdef HAVE_RUST +#include "tiledb/oxidize/query_predicates.h" +#endif + namespace tiledb::sm { class SparseIndexReaderBaseException : public StatusException { @@ -141,7 +145,7 @@ uint64_t SparseIndexReaderBase::available_memory() { bool SparseIndexReaderBase::has_post_deduplication_conditions( FragmentMetadata& frag_meta) { - return frag_meta.has_delete_meta() || condition_.has_value() || + return frag_meta.has_delete_meta() || predicates_.has_predicates() || (!delete_and_update_conditions_.empty() && !deletes_consolidation_no_purge_); } @@ -248,8 +252,8 @@ Status SparseIndexReaderBase::load_initial_data() { } // Make a list of dim/attr that will be loaded for query condition. - if (condition_.has_value()) { - for (auto& name : condition_->field_names()) { + if (predicates_.has_predicates()) { + for (auto& name : predicates_.field_names()) { if (!array_schema_.is_dim(name) || !include_coords_) { qc_loaded_attr_names_set_.insert(name); } @@ -610,7 +614,7 @@ void SparseIndexReaderBase::apply_query_condition( std::vector& result_tiles) { auto timer_se = stats_->start_timer("apply_query_condition"); - if (condition_.has_value() || !delete_and_update_conditions_.empty() || + if (predicates_.has_predicates() || !delete_and_update_conditions_.empty() || use_timestamps_) { // Process all tiles in parallel. throw_if_not_ok(parallel_for( @@ -656,16 +660,41 @@ void SparseIndexReaderBase::apply_query_condition( } // Compute the result of the query condition for this tile. - if (condition_.has_value()) { + if (predicates_.condition_.has_value()) { QueryCondition::Params params( query_memory_tracker_, *(frag_meta->array_schema().get())); - throw_if_not_ok(condition_->apply_sparse( + throw_if_not_ok(predicates_.condition_->apply_sparse( params, *rt, rt->post_dedup_bitmap())); if (array_schema_.allows_dups()) { rt->count_cells(); } } +#ifdef HAVE_RUST + if (predicates_.datafusion_.has_value()) { + rust::Slice bitmap( + rt->post_dedup_bitmap().data(), rt->post_dedup_bitmap().size()); + static_assert( + std::is_same_v || + std::is_same_v); + try { + if constexpr (std::is_same_v) { + predicates_.datafusion_.value()->evaluate_into_bitmap_u8( + *rt, bitmap); + } else { + predicates_.datafusion_.value()->evaluate_into_bitmap_u64( + *rt, bitmap); + } + } catch (const rust::Error& e) { + throw SparseIndexReaderBaseException( + "Error evaluating expression: " + std::string(e.what())); + } + if (array_schema_.allows_dups()) { + rt->count_cells(); + } + } +#endif + // Apply delete conditions. if (!delete_and_update_conditions_.empty()) { // Allocate delete condition idx vector if required. This vector diff --git a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc index 153ba11a2f7..41a6d6e9035 100644 --- a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc +++ b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc @@ -121,8 +121,8 @@ Status SparseUnorderedWithDupsReader::dowork() { } // Check that the query condition is valid. - if (condition_.has_value()) { - throw_if_not_ok(condition_->check(array_schema_)); + if (predicates_.condition_.has_value()) { + throw_if_not_ok(predicates_.condition_->check(array_schema_)); } get_dim_attr_stats(); diff --git a/tiledb/sm/query/strategy_base.h b/tiledb/sm/query/strategy_base.h index 9265af7d1ee..a28a0d2cee2 100644 --- a/tiledb/sm/query/strategy_base.h +++ b/tiledb/sm/query/strategy_base.h @@ -37,6 +37,7 @@ #include "tiledb/common/status.h" #include "tiledb/sm/array_schema/dimension.h" #include "tiledb/sm/misc/types.h" +#include "tiledb/sm/query/query_condition.h" #include "tiledb/sm/storage_manager/cancellation_source.h" #include "tiledb/sm/storage_manager/context_resources.h" @@ -50,7 +51,6 @@ class LocalQueryStateMachine; class MemoryTracker; class Subarray; class QueryBuffer; -class QueryCondition; using DefaultChannelAggregates = std::unordered_map>; @@ -78,7 +78,7 @@ class StrategyParams { std::unordered_map& aggregate_buffers, Subarray& subarray, Layout layout, - std::optional& condition, + QueryPredicates& predicates, DefaultChannelAggregates& default_channel_aggregates, bool skip_checks_serialization) : resources_(resources) @@ -93,7 +93,7 @@ class StrategyParams { , aggregate_buffers_(aggregate_buffers) , subarray_(subarray) , layout_(layout) - , condition_(condition) + , predicates_(predicates) , default_channel_aggregates_(default_channel_aggregates) , skip_checks_serialization_(skip_checks_serialization) { } @@ -163,7 +163,11 @@ class StrategyParams { /** Return the condition. */ inline std::optional& condition() { - return condition_; + return predicates_.condition_; + } + + inline QueryPredicates& predicates() { + return predicates_; } /** Return the default channel aggregates. */ @@ -220,8 +224,8 @@ class StrategyParams { /** Layout of the cells in the result of the subarray. */ Layout layout_; - /** Query condition. */ - std::optional& condition_; + /** Query predicates. */ + QueryPredicates& predicates_; /** Default channel aggregates. */ DefaultChannelAggregates& default_channel_aggregates_; diff --git a/tiledb/sm/query/test/unit_query_condition.cc b/tiledb/sm/query/test/unit_query_condition.cc index bfc2acf66f3..ff48913acf0 100644 --- a/tiledb/sm/query/test/unit_query_condition.cc +++ b/tiledb/sm/query/test/unit_query_condition.cc @@ -50,6 +50,7 @@ #ifdef HAVE_RUST #include "test/support/assert_helpers.h" +#include "tiledb/oxidize/arrow.h" #include "tiledb/oxidize/unit_query_condition.h" #endif @@ -5187,32 +5188,22 @@ std::vector instance( const tiledb::sm::ASTNode& ast) { using Asserter = tiledb::test::AsserterRapidcheck; - // set up traditional TileDB evaluation + // evaluate using traditional TileDB evaluation QueryCondition qc_ast(ast.clone()); qc_ast.rewrite_for_schema(array_schema); - - // set up datafusion evaluation - QueryCondition qc_datafusion(ast.clone()); - qc_datafusion.rewrite_for_schema(array_schema); - const bool datafusion_ok = qc_datafusion.rewrite_to_datafusion(array_schema); - ASSERTER(datafusion_ok); - - // prepare to evaluate QueryCondition::Params params( tiledb::test::get_test_memory_tracker(), array_schema); std::vector bitmap_ast(tile.cell_num(), 1); - std::vector bitmap_datafusion(tile.cell_num(), 1); - - // evaluate traditional ast const auto status_ast = qc_ast.apply_sparse(params, tile, bitmap_ast); ASSERTER(status_ast.ok()); - // evaluate datafusion - const auto status_datafusion = - qc_datafusion.apply_sparse(params, tile, bitmap_datafusion); - ASSERTER(status_datafusion.ok()); + // evaluate using datafusion + const auto rs_bitmap_datafusion = + evaluate_as_datafusion(array_schema, *qc_ast.ast().get(), tile); + std::vector bitmap_datafusion( + rs_bitmap_datafusion.begin(), rs_bitmap_datafusion.end()); // compare ASSERTER(bitmap_ast == bitmap_datafusion); @@ -5346,7 +5337,7 @@ TEST_CASE("QueryCondition: Apache DataFusion evaluation", "[QueryCondition]") { tile.tile_tuple("v")->fixed_tile().write( offsets_v.data(), 0, offsets_v.size() * sizeof(uint64_t)); - tile.tile_tuple("v")->var_tile().write(&values_v[0], 0, sizeof(values_v)); + tile.tile_tuple("v")->var_tile().write(&values_v[0], 0, values_v.size()); tile.tile_tuple("v")->validity_tile().write( validity_v.data(), 0, validity_v.size() * sizeof(uint8_t)); diff --git a/tiledb/sm/rest/rest_client.cc b/tiledb/sm/rest/rest_client.cc index a9339a15871..4dfc7b5b97e 100644 --- a/tiledb/sm/rest/rest_client.cc +++ b/tiledb/sm/rest/rest_client.cc @@ -104,7 +104,7 @@ std::shared_ptr RestClientFactory::make( Logger& logger, shared_ptr&& tracker) { if (factory_override_ == nullptr) { - return tdb::make_shared(HERE(), RestClient(config)); + return tdb::make_shared(HERE(), config); } else { return factory_override_( parent_stats, config, compute_tp, logger, std::move(tracker));