Skip to content

Commit 1e963a0

Browse files
committed
feat(io): add bulk delete API to FileIO.
1 parent 16cde97 commit 1e963a0

6 files changed

Lines changed: 50 additions & 15 deletions

File tree

src/iceberg/arrow/arrow_io.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <limits>
2323
#include <mutex>
2424
#include <optional>
25+
#include <vector>
2526

2627
#include <arrow/buffer.h>
2728
#include <arrow/filesystem/localfs.h>
@@ -568,6 +569,18 @@ Status ArrowFileSystemFileIO::DeleteFile(const std::string& file_location) {
568569
return {};
569570
}
570571

572+
Status ArrowFileSystemFileIO::DeleteFiles(
573+
const std::vector<std::string>& file_locations) {
574+
std::vector<std::string> paths;
575+
paths.reserve(file_locations.size());
576+
for (const auto& file_location : file_locations) {
577+
ICEBERG_ASSIGN_OR_RAISE(auto path, ResolvePath(file_location));
578+
paths.push_back(std::move(path));
579+
}
580+
ICEBERG_ARROW_RETURN_NOT_OK(arrow_fs_->DeleteFiles(paths));
581+
return {};
582+
}
583+
571584
std::unique_ptr<FileIO> ArrowFileSystemFileIO::MakeMockFileIO() {
572585
return std::make_unique<ArrowFileSystemFileIO>(
573586
std::make_shared<::arrow::fs::internal::MockFileSystem>(

src/iceberg/arrow/arrow_io_internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <memory>
2424
#include <optional>
2525
#include <string>
26+
#include <vector>
2627

2728
#include <arrow/filesystem/type_fwd.h>
2829
#include <arrow/io/type_fwd.h>
@@ -77,6 +78,9 @@ class ICEBERG_BUNDLE_EXPORT ArrowFileSystemFileIO : public FileIO {
7778
/// \brief Delete a file at the given location.
7879
Status DeleteFile(const std::string& file_location) override;
7980

81+
/// \brief Delete files at the given locations.
82+
Status DeleteFiles(const std::vector<std::string>& file_locations) override;
83+
8084
/// \brief Get the Arrow file system.
8185
const std::shared_ptr<::arrow::fs::FileSystem>& fs() const { return arrow_fs_; }
8286

src/iceberg/file_io.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,11 @@ Status FileIO::WriteFile(const std::string& file_location, std::string_view cont
100100
return FinishWithCloseStatus(std::move(status), stream->Close());
101101
}
102102

103+
Status FileIO::DeleteFiles(const std::vector<std::string>& file_locations) {
104+
for (const auto& file_location : file_locations) {
105+
ICEBERG_RETURN_UNEXPECTED(DeleteFile(file_location));
106+
}
107+
return {};
108+
}
109+
103110
} // namespace iceberg

src/iceberg/file_io.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <span>
2727
#include <string>
2828
#include <string_view>
29+
#include <vector>
2930

3031
#include "iceberg/iceberg_export.h"
3132
#include "iceberg/result.h"
@@ -163,15 +164,7 @@ class ICEBERG_EXPORT FileIO {
163164
///
164165
/// \param file_locations The locations of the files to delete.
165166
/// \return void if all deletes succeeded, an error code if any delete failed.
166-
virtual Status DeleteFiles(std::span<const std::string> file_locations) {
167-
for (const auto& file_location : file_locations) {
168-
auto status = DeleteFile(file_location);
169-
if (!status.has_value()) {
170-
return status;
171-
}
172-
}
173-
return {};
174-
}
167+
virtual Status DeleteFiles(const std::vector<std::string>& file_locations);
175168
};
176169

177170
} // namespace iceberg

src/iceberg/test/arrow_io_test.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <array>
2222
#include <memory>
2323
#include <string>
24+
#include <vector>
2425

2526
#include <arrow/filesystem/localfs.h>
2627
#include <arrow/result.h>
@@ -341,6 +342,20 @@ TEST_F(LocalFileIOTest, DeleteFile) {
341342
EXPECT_THAT(del_res, HasErrorMessage("Cannot delete file"));
342343
}
343344

345+
TEST_F(LocalFileIOTest, DeleteFiles) {
346+
auto first_path = CreateNewTempFilePath();
347+
auto second_path = CreateNewTempFilePath();
348+
ASSERT_THAT(file_io_->WriteFile(first_path, "hello"), IsOk());
349+
ASSERT_THAT(file_io_->WriteFile(second_path, "world"), IsOk());
350+
351+
std::vector<std::string> paths = {first_path, second_path};
352+
EXPECT_THAT(file_io_->DeleteFiles(paths), IsOk());
353+
354+
EXPECT_THAT(file_io_->ReadFile(first_path, std::nullopt), IsError(ErrorKind::kIOError));
355+
EXPECT_THAT(file_io_->ReadFile(second_path, std::nullopt),
356+
IsError(ErrorKind::kIOError));
357+
}
358+
344359
void VerifyReadFullyReadsFromAbsolutePosition(const std::shared_ptr<FileIO>& file_io,
345360
const std::string& path) {
346361
ASSERT_THAT(file_io->WriteFile(path, "abcdef"), IsOk());

src/iceberg/test/file_io_test.cc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,18 @@
2727

2828
#include "iceberg/test/matchers.h"
2929

30+
namespace iceberg {
3031
namespace {
3132

32-
class RecordingFileIO : public iceberg::FileIO {
33+
class RecordingFileIO : public FileIO {
3334
public:
3435
explicit RecordingFileIO(std::string failure_path = "")
3536
: failure_path_(std::move(failure_path)) {}
3637

37-
iceberg::Status DeleteFile(const std::string& file_location) override {
38+
Status DeleteFile(const std::string& file_location) override {
3839
deleted_paths.push_back(file_location);
3940
if (file_location == failure_path_) {
40-
return iceberg::IOError("failed to delete {}", file_location);
41+
return IOError("failed to delete {}", file_location);
4142
}
4243
return {};
4344
}
@@ -54,7 +55,7 @@ TEST(FileIOTest, DeleteFilesFallsBackToDeleteFileForEachPath) {
5455
RecordingFileIO file_io;
5556
std::vector<std::string> paths = {"file-a.avro", "file-b.avro"};
5657

57-
EXPECT_THAT(file_io.DeleteFiles(paths), iceberg::IsOk());
58+
EXPECT_THAT(file_io.DeleteFiles(paths), IsOk());
5859
EXPECT_THAT(file_io.deleted_paths,
5960
::testing::ElementsAre("file-a.avro", "file-b.avro"));
6061
}
@@ -65,8 +66,10 @@ TEST(FileIOTest, DeleteFilesReturnsFirstDeleteFileError) {
6566

6667
auto status = file_io.DeleteFiles(paths);
6768

68-
EXPECT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kIOError));
69-
EXPECT_THAT(status, iceberg::HasErrorMessage("failed to delete file-b.avro"));
69+
EXPECT_THAT(status, IsError(ErrorKind::kIOError));
70+
EXPECT_THAT(status, HasErrorMessage("failed to delete file-b.avro"));
7071
EXPECT_THAT(file_io.deleted_paths,
7172
::testing::ElementsAre("file-a.avro", "file-b.avro"));
7273
}
74+
75+
} // namespace iceberg

0 commit comments

Comments
 (0)