@@ -50,66 +50,12 @@ class ICEBERG_EXPORT Reader {
5050
5151 // / \brief Read next data from file.
5252 // /
53- // / \return std::monostate if the reader has no more data, otherwise `ArrowArray` or
54- // / `StructLike` depending on the data layout by the reader implementation.
55- using Data =
56- std::variant<std::monostate, ArrowArray, std::reference_wrapper<const StructLike>>;
53+ // / \return std::monostate if the reader has no more data, otherwise `ArrowArray`.
54+ using Data = std::variant<std::monostate, ArrowArray>;
5755 virtual Result<Data> Next () = 0;
5856
59- enum class DataLayout { kArrowArray , kStructLike };
60-
61- // / \brief Get the data layout returned by `Next()` of the reader.
62- virtual DataLayout data_layout () const = 0;
63- };
64-
65- // / \brief Wrapper of `Reader` to always return `StructLike`.
66- // /
67- // / If the data layout of the wrapped reader is `ArrowArray`, the data will be converted
68- // / to `StructLike`; otherwise, the data will be returned as is without any cost.
69- class ICEBERG_EXPORT StructLikeReader : public Reader {
70- public:
71- explicit StructLikeReader (std::unique_ptr<Reader> reader);
72-
73- ~StructLikeReader () override = default ;
74-
75- // / \brief Always read data into `StructLike` or monostate if no more data.
76- Result<Data> Next () final ;
77-
78- DataLayout data_layout () const final { return DataLayout::kStructLike ; }
79-
80- Status Open (const struct ReaderOptions & options) final {
81- return reader_->Open (options);
82- }
83-
84- Status Close () final { return reader_->Close (); }
85-
86- private:
87- std::unique_ptr<Reader> reader_;
88- };
89-
90- // / \brief Wrapper of `Reader` to always return `ArrowArray`.
91- // /
92- // / If the data layout of the wrapped reader is `StructLike`, the data will be converted
93- // / to `ArrowArray`; otherwise, the data will be returned as is without any cost.
94- class ICEBERG_EXPORT BatchReader : public Reader {
95- public:
96- explicit BatchReader (std::unique_ptr<Reader> reader);
97-
98- ~BatchReader () override = default ;
99-
100- // / \brief Always read data into `ArrowArray` or monostate if no more data.
101- Result<Data> Next () final ;
102-
103- DataLayout data_layout () const final { return DataLayout::kArrowArray ; }
104-
105- Status Open (const struct ReaderOptions & options) final {
106- return reader_->Open (options);
107- }
108-
109- Status Close () final { return reader_->Close (); }
110-
111- private:
112- std::unique_ptr<Reader> reader_;
57+ // / \brief Get the schema of the data.
58+ virtual Result<ArrowSchema> Schema () = 0;
11359};
11460
11561// / \brief A split of the file to read.
@@ -130,12 +76,12 @@ struct ICEBERG_EXPORT ReaderOptions {
13076 std::optional<Split> split;
13177 // / \brief The batch size to read. Only applies to implementations that support
13278 // / batching.
133- int64_t batch_size;
79+ int64_t batch_size = 4096 ;
13480 // / \brief FileIO instance to open the file. Reader implementations should down cast it
13581 // / to the specific FileIO implementation. By default, the `iceberg-bundle` library uses
13682 // / `ArrowFileSystemFileIO` as the default implementation.
13783 std::shared_ptr<class FileIO > io;
138- // / \brief The projection schema to read from the file.
84+ // / \brief The projection schema to read from the file. This field is required.
13985 std::shared_ptr<class Schema > projection;
14086 // / \brief The filter to apply to the data. Reader implementations may ignore this if
14187 // / the file format does not support filtering.
@@ -160,9 +106,4 @@ struct ICEBERG_EXPORT ReaderFactoryRegistry {
160106 const ReaderOptions& options);
161107};
162108
163- // / \brief Macro to register a reader factory for a specific file format.
164- #define ICEBERG_REGISTER_READER_FACTORY (format_type, reader_factory ) \
165- static ::iceberg::ReaderFactoryRegistry register_reader_factory_##format_type( \
166- ::iceberg::FileFormatType::k##format_type, reader_factory);
167-
168109} // namespace iceberg
0 commit comments