Skip to content

Commit 90fe593

Browse files
author
xiao.dong
committed
feat: add fileWriter and manifest writer interface define
1 parent 3d617a8 commit 90fe593

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

src/iceberg/file_writer.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/file_writer.h
23+
/// Writer interface for file formats like Parquet, Avro and ORC.
24+
25+
#include <functional>
26+
#include <memory>
27+
#include <optional>
28+
29+
#include "iceberg/arrow_c_data.h"
30+
#include "iceberg/file_format.h"
31+
#include "iceberg/result.h"
32+
#include "iceberg/type_fwd.h"
33+
34+
namespace iceberg {
35+
36+
/// \brief Options for creating a writer.
37+
struct ICEBERG_EXPORT WriterOptions {
38+
/// \brief The path to the file to write.
39+
std::string path;
40+
/// \brief The schema of the data to write.
41+
ArrowSchema schema;
42+
/// \brief FileIO instance to open the file. Writer implementations should down cast it
43+
/// to the specific FileIO implementation. By default, the `iceberg-bundle` library uses
44+
/// `ArrowFileSystemFileIO` as the default implementation.
45+
std::shared_ptr<class FileIO> io;
46+
/// \brief Format-specific or implementation-specific properties.
47+
std::unordered_map<std::string, std::string> properties;
48+
};
49+
50+
/// \brief Base writer class to write data from different file formats.
51+
class ICEBERG_EXPORT Writer {
52+
public:
53+
virtual ~Writer() = default;
54+
Writer() = default;
55+
Writer(const Writer&) = delete;
56+
Writer& operator=(const Writer&) = delete;
57+
58+
/// \brief Open the writer.
59+
virtual Status Open(const struct WriterOptions& options) = 0;
60+
61+
/// \brief Close the writer.
62+
virtual Status Close() = 0;
63+
64+
/// \brief Read next data from file.
65+
///
66+
/// \return Status of write results.
67+
virtual Status Write(const ArrowArray& data) = 0;
68+
};
69+
70+
/// \brief Factory function to create a writer of a specific file format.
71+
using WriterFactory = std::function<Result<std::unique_ptr<Writer>>()>;
72+
73+
/// \brief Registry of writer factories for different file formats.
74+
struct ICEBERG_EXPORT WriterFactoryRegistry {
75+
/// \brief Register a factory function for a specific file format.
76+
WriterFactoryRegistry(FileFormatType format_type, WriterFactory factory);
77+
78+
/// \brief Get the factory function for a specific file format.
79+
static WriterFactory& GetFactory(FileFormatType format_type);
80+
81+
/// \brief Open a writer for a specific file format.
82+
static Result<std::unique_ptr<Writer>> Open(FileFormatType format_type,
83+
const WriterOptions& options);
84+
};
85+
86+
} // namespace iceberg

src/iceberg/manifest_writer.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/manifest_writer.h
23+
/// Data writer interface for manifest files.
24+
25+
#include <memory>
26+
#include <vector>
27+
28+
#include "iceberg/file_writer.h"
29+
#include "iceberg/iceberg_export.h"
30+
#include "iceberg/type_fwd.h"
31+
32+
namespace iceberg {
33+
34+
/// \brief Write manifest entries to a manifest file.
35+
class ICEBERG_EXPORT ManifestWriter {
36+
public:
37+
virtual ~ManifestWriter() = default;
38+
virtual Status WriteManifestEntries(
39+
const std::vector<ManifestEntry>& entries) const = 0;
40+
41+
/// \brief Creates a writer for a manifest file.
42+
/// \param manifest_location Path to the manifest file.
43+
/// \param file_io File IO implementation to use.
44+
/// \return A Result containing the writer or an error.
45+
static Result<std::unique_ptr<ManifestWriter>> MakeWriter(
46+
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
47+
std::shared_ptr<Schema> partition_schema);
48+
};
49+
50+
/// \brief Write manifest files to a manifest list file.
51+
class ICEBERG_EXPORT ManifestListWriter {
52+
public:
53+
virtual ~ManifestListWriter() = default;
54+
virtual Status WriteManifestFiles(const std::vector<ManifestFile>& files) const = 0;
55+
56+
/// \brief Creates a writer for the manifest list.
57+
/// \param manifest_list_location Path to the manifest list file.
58+
/// \param file_io File IO implementation to use.
59+
/// \return A Result containing the writer or an error.
60+
static Result<std::unique_ptr<ManifestListWriter>> MakeWriter(
61+
std::string_view manifest_list_location, std::shared_ptr<FileIO> file_io);
62+
};
63+
64+
} // namespace iceberg

0 commit comments

Comments
 (0)