From 04a68b8d5d3044e45a68c2d0f925dd9e7ddb3fd6 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Fri, 18 Jul 2025 10:35:17 +0800 Subject: [PATCH] feat: add in-memory FileIO backed by Arrow MockFileSystem --- src/iceberg/arrow/arrow_fs_file_io.cc | 13 ++++++++++++- ...row_fs_file_io.h => arrow_fs_file_io_internal.h} | 6 ++++++ src/iceberg/avro/avro_reader.cc | 2 +- test/arrow_fs_file_io_test.cc | 3 +-- test/avro_test.cc | 2 +- test/gzip_decompress_test.cc | 2 +- test/manifest_list_reader_test.cc | 2 +- test/metadata_io_test.cc | 2 +- 8 files changed, 24 insertions(+), 8 deletions(-) rename src/iceberg/arrow/{arrow_fs_file_io.h => arrow_fs_file_io_internal.h} (87%) diff --git a/src/iceberg/arrow/arrow_fs_file_io.cc b/src/iceberg/arrow/arrow_fs_file_io.cc index 535230954..5c1523eca 100644 --- a/src/iceberg/arrow/arrow_fs_file_io.cc +++ b/src/iceberg/arrow/arrow_fs_file_io.cc @@ -17,11 +17,13 @@ * under the License. */ -#include "iceberg/arrow/arrow_fs_file_io.h" +#include #include +#include #include "iceberg/arrow/arrow_error_transform_internal.h" +#include "iceberg/arrow/arrow_fs_file_io_internal.h" namespace iceberg::arrow { @@ -67,4 +69,13 @@ Status ArrowFileSystemFileIO::DeleteFile(const std::string& file_location) { return {}; } +std::unique_ptr<::arrow::fs::FileSystem> ArrowFileSystemFileIO::MakeMockFileIO() { + return std::make_unique<::arrow::fs::internal::MockFileSystem>( + std::chrono::system_clock::now()); +} + +std::unique_ptr<::arrow::fs::FileSystem> ArrowFileSystemFileIO::MakeLocalFileIO() { + return std::make_unique<::arrow::fs::LocalFileSystem>(); +} + } // namespace iceberg::arrow diff --git a/src/iceberg/arrow/arrow_fs_file_io.h b/src/iceberg/arrow/arrow_fs_file_io_internal.h similarity index 87% rename from src/iceberg/arrow/arrow_fs_file_io.h rename to src/iceberg/arrow/arrow_fs_file_io_internal.h index e187c89d2..4f8509d93 100644 --- a/src/iceberg/arrow/arrow_fs_file_io.h +++ b/src/iceberg/arrow/arrow_fs_file_io_internal.h @@ -34,6 +34,12 @@ class ICEBERG_BUNDLE_EXPORT ArrowFileSystemFileIO : public FileIO { explicit ArrowFileSystemFileIO(std::shared_ptr<::arrow::fs::FileSystem> arrow_fs) : arrow_fs_(std::move(arrow_fs)) {} + /// \brief Make an in-memory FileIO backed by arrow::fs::internal::MockFileSystem. + static std::unique_ptr<::arrow::fs::FileSystem> MakeMockFileIO(); + + /// \brief Make a local FileIO backed by arrow::fs::LocalFileSystem. + static std::unique_ptr<::arrow::fs::FileSystem> MakeLocalFileIO(); + ~ArrowFileSystemFileIO() override = default; /// \brief Read the content of the file at the given location. diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index 29b42b79b..19bc69df0 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -31,7 +31,7 @@ #include #include -#include "iceberg/arrow/arrow_fs_file_io.h" +#include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/avro/avro_data_util_internal.h" #include "iceberg/avro/avro_schema_util_internal.h" #include "iceberg/avro/avro_stream_internal.h" diff --git a/test/arrow_fs_file_io_test.cc b/test/arrow_fs_file_io_test.cc index ab382b39e..f1abf8d6d 100644 --- a/test/arrow_fs_file_io_test.cc +++ b/test/arrow_fs_file_io_test.cc @@ -17,11 +17,10 @@ * under the License. */ -#include "iceberg/arrow/arrow_fs_file_io.h" - #include #include +#include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "matchers.h" #include "temp_file_test_base.h" diff --git a/test/avro_test.cc b/test/avro_test.cc index ff39ebbf9..fe04f9bf6 100644 --- a/test/avro_test.cc +++ b/test/avro_test.cc @@ -29,7 +29,7 @@ #include #include -#include "iceberg/arrow/arrow_fs_file_io.h" +#include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/avro/avro_reader.h" #include "iceberg/schema.h" #include "iceberg/type.h" diff --git a/test/gzip_decompress_test.cc b/test/gzip_decompress_test.cc index 5edb36619..1325ba286 100644 --- a/test/gzip_decompress_test.cc +++ b/test/gzip_decompress_test.cc @@ -23,7 +23,7 @@ #include #include -#include "iceberg/arrow/arrow_fs_file_io.h" +#include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/file_io.h" #include "iceberg/util/gzip_internal.h" #include "matchers.h" diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index f825a589d..d2850efa3 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -21,7 +21,7 @@ #include #include -#include "iceberg/arrow/arrow_fs_file_io.h" +#include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/avro/avro_reader.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader.h" diff --git a/test/metadata_io_test.cc b/test/metadata_io_test.cc index c50f6a10b..1590af4d9 100644 --- a/test/metadata_io_test.cc +++ b/test/metadata_io_test.cc @@ -24,7 +24,7 @@ #include #include -#include "iceberg/arrow/arrow_fs_file_io.h" +#include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/file_io.h" #include "iceberg/json_internal.h" #include "iceberg/schema.h"