Skip to content

Commit 4797c93

Browse files
authored
Add support for copying directories across filesystems. (#5666)
Add support for copying directories across filesystems. Note that directory content will be copied _serially_, as listed by `ls_filtered_v2`. --- TYPE: FEATURE DESC: Add support for copying directories across filesystems. --- Resolves CORE-204.
1 parent 0cbb47e commit 4797c93

File tree

2 files changed

+129
-1
lines changed

2 files changed

+129
-1
lines changed

test/src/unit-vfs.cc

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,115 @@ TEST_CASE("VFS: copy_file", "[vfs][copy_file]") {
280280
}
281281
}
282282

283+
TEST_CASE("VFS: copy_dir", "[vfs][copy_dir]") {
284+
LocalFsTest src_fs({});
285+
S3Test dst_fs({});
286+
if (!dst_fs.is_supported()) {
287+
return;
288+
}
289+
URI src_path = src_fs.temp_dir_.add_trailing_slash();
290+
URI dst_path = dst_fs.temp_dir_.add_trailing_slash();
291+
292+
ThreadPool compute_tp(4);
293+
ThreadPool io_tp(4);
294+
Config config = set_config_params();
295+
VFS vfs{
296+
&g_helper_stats, g_helper_logger().get(), &compute_tp, &io_tp, config};
297+
298+
/* Create the following file hierarchy:
299+
*
300+
* src_path/file1
301+
* src_path/dir2/file2
302+
* src_path/dir3/subdir/file3
303+
*/
304+
auto file1 = URI(src_path.to_string() + "file1");
305+
auto dir2 = URI(src_path.to_string() + "dir2/");
306+
auto file2 = URI(dir2.to_string() + "file2");
307+
auto dir3 = URI(src_path.to_string() + "dir3/");
308+
auto subdir = URI(dir3.to_string() + "subdir/");
309+
auto file3 = URI(subdir.to_string() + "file3");
310+
REQUIRE_NOTHROW(vfs.touch(file1));
311+
REQUIRE_NOTHROW(vfs.create_dir(URI(dir2)));
312+
REQUIRE_NOTHROW(vfs.touch(file2));
313+
REQUIRE_NOTHROW(vfs.create_dir(URI(dir3)));
314+
REQUIRE_NOTHROW(vfs.create_dir(URI(dir3)));
315+
REQUIRE_NOTHROW(vfs.touch(file3));
316+
317+
// Write some random test data to file1, file2, file3
318+
size_t test_str_size = 10 * 1048576; // 10 MB
319+
std::string test_str;
320+
test_str.reserve(test_str_size);
321+
std::random_device rd;
322+
std::mt19937 gen(rd());
323+
const std::string test_chars = "abcdefghijklmnopqrstuvwxyz";
324+
std::uniform_int_distribution<size_t> dist(0, test_chars.size() - 1);
325+
for (size_t i = 0; i < test_str_size; ++i) {
326+
test_str += test_chars[dist(gen)];
327+
}
328+
REQUIRE(test_str.size() == test_str_size);
329+
REQUIRE_NOTHROW(vfs.write(file1, test_str.data(), test_str_size));
330+
require_tiledb_ok(vfs.close_file(file1));
331+
std::shuffle(test_str.begin(), test_str.end(), gen);
332+
REQUIRE_NOTHROW(vfs.write(file2, test_str.data(), test_str_size));
333+
require_tiledb_ok(vfs.close_file(file2));
334+
std::shuffle(test_str.begin(), test_str.end(), gen);
335+
REQUIRE_NOTHROW(vfs.write(file3, test_str.data(), test_str_size));
336+
require_tiledb_ok(vfs.close_file(file3));
337+
338+
// Copy the source directory to the destination.
339+
REQUIRE_NOTHROW(vfs.copy_dir(src_path, dst_path));
340+
CHECK(vfs.is_dir(src_path));
341+
CHECK(vfs.is_dir(dst_path));
342+
auto dst_file1 = URI(dst_path.to_string() + "file1");
343+
auto dst_file2 = URI(dst_path.to_string() + "dir2/file2");
344+
auto dst_file3 = URI(dst_path.to_string() + "dir3/subdir/file3");
345+
346+
// Validate the file contents are the same.
347+
if (test_str_size > 0) {
348+
std::string src_str;
349+
src_str.reserve(test_str_size);
350+
std::string dst_str;
351+
dst_str.reserve(test_str_size);
352+
353+
CHECK(vfs.is_file(dst_file1));
354+
require_tiledb_ok(
355+
vfs.read_exactly(file1, 0, (char*)src_str.data(), test_str_size));
356+
require_tiledb_ok(
357+
vfs.read_exactly(dst_file1, 0, (char*)dst_str.data(), test_str_size));
358+
CHECK(src_str == dst_str);
359+
360+
CHECK(vfs.is_file(dst_file2));
361+
require_tiledb_ok(
362+
vfs.read_exactly(file2, 0, (char*)src_str.data(), test_str_size));
363+
require_tiledb_ok(
364+
vfs.read_exactly(dst_file2, 0, (char*)dst_str.data(), test_str_size));
365+
CHECK(src_str == dst_str);
366+
367+
CHECK(vfs.is_file(dst_file3));
368+
require_tiledb_ok(
369+
vfs.read_exactly(file3, 0, (char*)src_str.data(), test_str_size));
370+
require_tiledb_ok(
371+
vfs.read_exactly(dst_file3, 0, (char*)dst_str.data(), test_str_size));
372+
CHECK(src_str == dst_str);
373+
}
374+
375+
// Clean up.
376+
if (src_path.is_gcs() || src_path.is_s3() || src_path.is_azure()) {
377+
REQUIRE_NOTHROW(vfs.remove_bucket(src_path));
378+
REQUIRE(!vfs.is_bucket(src_path));
379+
} else {
380+
REQUIRE_NOTHROW(vfs.remove_dir(src_path));
381+
REQUIRE(!vfs.is_dir(src_path));
382+
}
383+
if (dst_path.is_gcs() || dst_path.is_s3() || dst_path.is_azure()) {
384+
REQUIRE_NOTHROW(vfs.remove_bucket(dst_path));
385+
REQUIRE(!vfs.is_bucket(dst_path));
386+
} else {
387+
REQUIRE_NOTHROW(vfs.remove_dir(dst_path));
388+
REQUIRE(!vfs.is_dir(dst_path));
389+
}
390+
}
391+
283392
using AllBackends = std::tuple<LocalFsTest, GCSTest, GSTest, S3Test, AzureTest>;
284393
TEMPLATE_LIST_TEST_CASE(
285394
"VFS: URI semantics and file management", "[vfs][uri]", AllBackends) {

tiledb/sm/filesystem/vfs.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,10 +481,29 @@ void VFS::copy_dir(const URI& old_uri, const URI& new_uri) {
481481
auto instrument = make_log_duration_instrument(old_uri, new_uri);
482482
auto& old_fs = get_fs(old_uri);
483483
auto& new_fs = get_fs(new_uri);
484+
auto& src_parent_path = old_uri.to_string();
485+
auto& dst_parent_path = new_uri.to_string();
486+
484487
if (&old_fs == &new_fs) {
485488
old_fs.copy_dir(old_uri, new_uri);
486489
} else {
487-
throw UnsupportedOperation("copy_dir");
490+
// Recursively list and copy all source files
491+
ResultFilterV2 result_filter =
492+
[](const std::string_view&, uint64_t, bool is_dir) {
493+
if (is_dir) {
494+
return false; // filter out directories.
495+
}
496+
return true;
497+
};
498+
auto paths = old_fs.ls_filtered_v2(old_uri, result_filter, true);
499+
500+
for (auto& path : paths) {
501+
auto old_path = URI(path.first);
502+
auto new_path =
503+
URI(dst_parent_path + path.first.substr(src_parent_path.size()));
504+
// Copy files across filesystems.
505+
copy_file(old_path, new_path);
506+
}
488507
}
489508
}
490509

0 commit comments

Comments
 (0)