Skip to content

Commit b8bc74a

Browse files
Add experimental C API for ls_recursive. (#4615)
This adds experimental C API for ls_recursive, which is currently only supported over S3. --- TYPE: FEATURE DESC: Add experimental C API for ls_recursive. --------- Co-authored-by: eric-hughes-tiledb <[email protected]>
1 parent 4e7b978 commit b8bc74a

File tree

10 files changed

+450
-7
lines changed

10 files changed

+450
-7
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ list(APPEND TILEDB_C_API_RELATIVE_HEADERS
352352
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/string/string_api_external.h"
353353
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/vfs/vfs_api_enum.h"
354354
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/vfs/vfs_api_external.h"
355+
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/vfs/vfs_api_experimental.h"
355356
)
356357
set(TILEDB_C_API_RELATIVE_HEADER_BASE "${CMAKE_CURRENT_SOURCE_DIR}")
357358

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ set(TILEDB_UNIT_TEST_SOURCES
203203
src/unit-SubarrayPartitioner-sparse.cc
204204
src/unit-vfs.cc
205205
src/unit-win-filesystem.cc
206+
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/vfs/test/unit_capi_ls_recursive.cc"
206207
)
207208

208209
if (TILEDB_CPP_API)
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/**
2+
* @file tiledb/api/c_api/vfs/test/unit_capi_ls_recursive.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2023 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* Tests the C API for tiledb_vfs_ls_recursive.
31+
*
32+
* TODO: This test is built and ran as part of tiledb_unit. Once we're able to
33+
* execute these tests in CI, we should build this test as a separate unit.
34+
*/
35+
36+
#include <test/support/tdb_catch.h>
37+
#include "test/support/src/vfs_helpers.h"
38+
39+
using namespace tiledb::test;
40+
41+
TEST_CASE("C API: ls_recursive callback", "[vfs][ls-recursive]") {
42+
using tiledb::sm::LsObjects;
43+
S3Test s3_test({10, 50});
44+
if (!s3_test.is_supported()) {
45+
return;
46+
}
47+
auto expected = s3_test.expected_results();
48+
49+
vfs_config vfs_config;
50+
tiledb_ctx_t* ctx;
51+
tiledb_ctx_alloc(vfs_config.config, &ctx);
52+
tiledb_vfs_t* vfs;
53+
tiledb_vfs_alloc(ctx, vfs_config.config, &vfs);
54+
55+
LsObjects data;
56+
tiledb_ls_callback_t cb = [](const char* path,
57+
size_t path_len,
58+
uint64_t object_size,
59+
void* data) -> int32_t {
60+
auto* ls_data = static_cast<LsObjects*>(data);
61+
ls_data->push_back({{path, path_len}, object_size});
62+
return 1;
63+
};
64+
65+
// This callback will return 0 exactly once. Traversal should stop immediately
66+
// and not continue to the next object.
67+
SECTION("callback stops traversal") {
68+
cb = [](const char* path,
69+
size_t path_len,
70+
uint64_t object_size,
71+
void* data) -> int32_t {
72+
// There's no precheck here to push_back, so the vector size will match
73+
// the number of times the callback was executed.
74+
auto* ls_data = static_cast<LsObjects*>(data);
75+
ls_data->push_back({{path, path_len}, object_size});
76+
// Stop traversal after we collect 10 results.
77+
return ls_data->size() != 10;
78+
};
79+
expected.resize(10);
80+
}
81+
82+
CHECK(
83+
tiledb_vfs_ls_recursive(ctx, vfs, s3_test.temp_dir_.c_str(), cb, &data) ==
84+
TILEDB_OK);
85+
CHECK(data.size() == expected.size());
86+
CHECK(data == expected);
87+
}
88+
89+
TEST_CASE("C API: ls_recursive throwing callback", "[vfs][ls-recursive]") {
90+
using tiledb::sm::LsObjects;
91+
S3Test s3_test({10, 50});
92+
if (!s3_test.is_supported()) {
93+
return;
94+
}
95+
auto expected = s3_test.expected_results();
96+
97+
vfs_config vfs_config;
98+
tiledb_ctx_t* ctx;
99+
tiledb_ctx_alloc(vfs_config.config, &ctx);
100+
tiledb_vfs_t* vfs;
101+
tiledb_vfs_alloc(ctx, vfs_config.config, &vfs);
102+
103+
LsObjects data;
104+
tiledb_ls_callback_t cb =
105+
[](const char*, size_t, uint64_t, void*) -> int32_t {
106+
throw std::runtime_error("Throwing callback");
107+
};
108+
109+
CHECK(
110+
tiledb_vfs_ls_recursive(ctx, vfs, s3_test.temp_dir_.c_str(), cb, &data) ==
111+
TILEDB_ERR);
112+
CHECK(data.empty());
113+
}

tiledb/api/c_api/vfs/test/unit_capi_vfs.cc

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,3 +675,109 @@ TEST_CASE(
675675

676676
CHECK(error != nullptr);
677677
}
678+
679+
TEST_CASE("C API: tiledb_vfs_ls_recursive argument validation", "[capi][vfs]") {
680+
/*
681+
* No "success" sections here; too much overhead to set up.
682+
*/
683+
ordinary_vfs x;
684+
int32_t data;
685+
auto cb = [](const char*, size_t, uint64_t, void*) { return 0; };
686+
SECTION("null context") {
687+
auto rc{tiledb_vfs_ls_recursive(nullptr, x.vfs, TEST_URI, cb, &data)};
688+
CHECK(tiledb_status(rc) == TILEDB_INVALID_CONTEXT);
689+
}
690+
SECTION("null vfs") {
691+
auto rc{tiledb_vfs_ls_recursive(x.ctx, nullptr, TEST_URI, cb, &data)};
692+
CHECK(tiledb_status(rc) == TILEDB_ERR);
693+
}
694+
SECTION("null uri") {
695+
auto rc{tiledb_vfs_ls_recursive(x.ctx, x.vfs, nullptr, cb, &data)};
696+
CHECK(tiledb_status(rc) == TILEDB_ERR);
697+
}
698+
SECTION("null callback") {
699+
auto rc{tiledb_vfs_ls_recursive(x.ctx, x.vfs, TEST_URI, nullptr, &data)};
700+
CHECK(tiledb_status(rc) == TILEDB_ERR);
701+
}
702+
SECTION("null data ptr") {
703+
auto rc{tiledb_vfs_ls_recursive(x.ctx, x.vfs, TEST_URI, cb, nullptr)};
704+
CHECK(tiledb_status(rc) == TILEDB_ERR);
705+
}
706+
}
707+
708+
TEST_CASE(
709+
"C API: VFS recursive ls unsupported backends",
710+
"[capi][vfs][ls-recursive]") {
711+
ordinary_vfs vfs;
712+
int ls_data;
713+
auto cb = [](const char*, size_t, uint64_t, void*) { return 0; };
714+
// Recursive ls is currently only supported for S3.
715+
tiledb::sm::URI uri{GENERATE(
716+
"file:///path/",
717+
"mem:///path/",
718+
"azure://path/",
719+
"gcs://path/",
720+
"hdfs://path/")};
721+
DYNAMIC_SECTION(
722+
"Test recursive ls usupported backend over " << uri.backend_name()) {
723+
if (!vfs.vfs->vfs()->supports_uri_scheme(uri)) {
724+
return;
725+
}
726+
CHECK(
727+
tiledb_vfs_ls_recursive(vfs.ctx, vfs.vfs, uri.c_str(), cb, &ls_data) ==
728+
TILEDB_ERR);
729+
}
730+
}
731+
732+
TEST_CASE(
733+
"C API: CallbackWrapper operator() validation",
734+
"[ls-recursive][callback][wrapper]") {
735+
tiledb::sm::LsObjects data;
736+
auto cb = [](const char* path,
737+
size_t path_len,
738+
uint64_t object_size,
739+
void* data) -> int32_t {
740+
if (object_size > 100) {
741+
// Throw if object size is greater than 100 bytes.
742+
throw std::runtime_error("Throwing callback");
743+
} else if (!std::string(path, path_len).ends_with(".txt")) {
744+
// Reject non-txt files.
745+
return 0;
746+
}
747+
auto* ls_data = static_cast<tiledb::sm::LsObjects*>(data);
748+
ls_data->push_back({{path, path_len}, object_size});
749+
return 1;
750+
};
751+
tiledb::sm::CallbackWrapper wrapper(cb, &data);
752+
753+
SECTION("Callback return 1 signals to continue traversal") {
754+
CHECK(wrapper("file.txt", 10) == 1);
755+
CHECK(data.size() == 1);
756+
}
757+
SECTION("Callback return 0 signals to stop traversal") {
758+
CHECK_THROWS_AS(wrapper("some/dir/", 0) == 0, tiledb::sm::LsStopTraversal);
759+
}
760+
SECTION("Callback exception is propagated") {
761+
CHECK_THROWS_WITH(wrapper("path", 101) == 0, "Throwing callback");
762+
}
763+
}
764+
765+
TEST_CASE(
766+
"C API: CallbackWrapper construction validation",
767+
"[ls-recursive][callback][wrapper]") {
768+
using tiledb::sm::CallbackWrapper;
769+
tiledb::sm::LsObjects data;
770+
auto cb = [](const char*, size_t, uint64_t, void*) -> int32_t { return 1; };
771+
SECTION("Null callback") {
772+
CHECK_THROWS(CallbackWrapper(nullptr, &data));
773+
}
774+
SECTION("Null data") {
775+
CHECK_THROWS(CallbackWrapper(cb, nullptr));
776+
}
777+
SECTION("Null callback and data") {
778+
CHECK_THROWS(CallbackWrapper(nullptr, nullptr));
779+
}
780+
SECTION("Valid callback and data") {
781+
CHECK_NOTHROW(CallbackWrapper(cb, &data));
782+
}
783+
}

tiledb/api/c_api/vfs/vfs_api.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
*/
3232

3333
#include "tiledb/api/c_api_support/c_api_support.h"
34+
#include "vfs_api_experimental.h"
3435
#include "vfs_api_internal.h"
3536

3637
namespace tiledb::api {
@@ -311,6 +312,23 @@ capi_return_t tiledb_vfs_touch(tiledb_vfs_t* vfs, const char* uri) {
311312
return TILEDB_OK;
312313
}
313314

315+
capi_return_t tiledb_vfs_ls_recursive(
316+
tiledb_vfs_t* vfs,
317+
const char* path,
318+
tiledb_ls_callback_t callback,
319+
void* data) {
320+
ensure_vfs_is_valid(vfs);
321+
if (path == nullptr) {
322+
throw CAPIStatusException("Invalid TileDB object: VFS passed a null path.");
323+
} else if (callback == nullptr) {
324+
throw CAPIStatusException(
325+
"Invalid TileDB object: Callback function is null.");
326+
}
327+
ensure_output_pointer_is_valid(data);
328+
vfs->ls_recursive(tiledb::sm::URI(path), callback, data);
329+
return TILEDB_OK;
330+
}
331+
314332
} // namespace tiledb::api
315333

316334
using tiledb::api::api_entry_context;
@@ -544,3 +562,14 @@ CAPI_INTERFACE(
544562
vfs_touch, tiledb_ctx_t* ctx, tiledb_vfs_t* vfs, const char* uri) {
545563
return api_entry_context<tiledb::api::tiledb_vfs_touch>(ctx, vfs, uri);
546564
}
565+
566+
CAPI_INTERFACE(
567+
vfs_ls_recursive,
568+
tiledb_ctx_t* ctx,
569+
tiledb_vfs_t* vfs,
570+
const char* path,
571+
tiledb_ls_callback_t callback,
572+
void* data) {
573+
return api_entry_context<tiledb::api::tiledb_vfs_ls_recursive>(
574+
ctx, vfs, path, callback, data);
575+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/**
2+
* @file tiledb/api/c_api/vfs/vfs_api_experimental.h
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2023 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
* This file declares the experimental VFS C API for TileDB.
31+
*/
32+
33+
#ifndef TILEDB_VFS_API_EXPERIMENTAL_H
34+
#define TILEDB_VFS_API_EXPERIMENTAL_H
35+
36+
#include "tiledb/api/c_api/api_external_common.h"
37+
#include "tiledb/api/c_api/context/context_api_external.h"
38+
#include "tiledb/api/c_api/vfs/vfs_api_external.h"
39+
40+
#ifdef __cplusplus
41+
extern "C" {
42+
#endif
43+
44+
/**
45+
* Typedef for ls_recursive callback function invoked on each object collected.
46+
*
47+
* @param path The path of a visited object for the relative filesystem.
48+
* @param path_len The length of the path.
49+
* @param object_size The size of the object at the current path.
50+
* @param data Data passed to the callback used to store collected results.
51+
*/
52+
typedef int32_t (*tiledb_ls_callback_t)(
53+
const char* path, size_t path_len, uint64_t object_size, void* data);
54+
55+
/**
56+
* Visits the children of `path` recursively, invoking the callback for each
57+
* entry. The callback should return 1 to continue traversal, 0 to stop, or -1
58+
* on error. The callback is responsible for writing gathered entries into the
59+
* `data` buffer, for example using a pointer to a user-defined struct.
60+
*
61+
* Currently only S3 is supported, and the `path` must be a valid S3 URI.
62+
*
63+
* **Example:**
64+
*
65+
* @code{.c}
66+
* int my_callback(
67+
* const char* path, size_t path_length, uint64_t file_size, void* data) {
68+
* MyCbStruct cb_data = static_cast<MyCbStruct*>(data);
69+
* // Perform custom callback behavior here.
70+
* return 1; // Continue traversal to next entry.
71+
* }
72+
* MyCbStruct* cb_data = allocate_cb_struct();
73+
*
74+
* tiledb_vfs_ls_recursive(ctx, vfs, "s3://bucket/foo", my_callback, &cb_data);
75+
* @endcode
76+
*
77+
* @param[in] ctx The TileDB context.
78+
* @param[in] vfs The virtual filesystem object.
79+
* @param[in] path The path in which the traversal will occur.
80+
* @param[in] callback
81+
* The callback function to be applied on every visited object.
82+
* The callback should return `0` if the iteration must stop, and `1`
83+
* if the iteration must continue. It takes as input the currently visited
84+
* path, the length of the currently visited path, the size of the file, and
85+
* user provided buffer for paths and object sizes in the form of a struct
86+
* pointer. The callback returns `-1` upon error. Note that `path` in the
87+
* callback will be an **absolute** path.
88+
* @param[in] data Data pointer passed into the callback for storing results.
89+
* @return `TILEDB_OK` for success and `TILEDB_ERR` for error.
90+
*/
91+
TILEDB_EXPORT capi_return_t tiledb_vfs_ls_recursive(
92+
tiledb_ctx_t* ctx,
93+
tiledb_vfs_t* vfs,
94+
const char* path,
95+
tiledb_ls_callback_t callback,
96+
void* data) TILEDB_NOEXCEPT;
97+
98+
#ifdef __cplusplus
99+
}
100+
#endif
101+
102+
#endif // TILEDB_VFS_API_EXPERIMENTAL_H

0 commit comments

Comments
 (0)