Skip to content

Commit 7d9e994

Browse files
authored
Merge branch 'apache:main' into main
2 parents 50d9092 + a13eed2 commit 7d9e994

File tree

16 files changed

+933
-122
lines changed

16 files changed

+933
-122
lines changed

.github/workflows/cpp-linter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
with:
4848
style: file
4949
tidy-checks: ''
50-
version: 19
50+
version: 22
5151
files-changed-only: true
5252
lines-changed-only: true
5353
thread-comments: true

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ repos:
3030
- id: check-added-large-files
3131

3232
- repo: https://github.com/pre-commit/mirrors-clang-format
33-
rev: v19.1.5
33+
rev: v20.1.8
3434
hooks:
3535
- id: clang-format
3636
exclude: ^test/resources/.*\.json$

src/iceberg/catalog.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,15 +176,6 @@ class ICEBERG_EXPORT Catalog {
176176
virtual Result<std::shared_ptr<Table>> RegisterTable(
177177
const TableIdentifier& identifier, const std::string& metadata_file_location) = 0;
178178

179-
/// \brief Instantiate a builder to either create a table or start a create/replace
180-
/// transaction
181-
///
182-
/// \param identifier a table identifier
183-
/// \param schema a schema
184-
/// \return the builder to create a table or start a create/replace transaction
185-
virtual std::unique_ptr<class TableBuilder> BuildTable(
186-
const TableIdentifier& identifier, const Schema& schema) const = 0;
187-
188179
/// \brief A builder used to create valid tables or start create/replace transactions
189180
class TableBuilder {
190181
public:
@@ -233,6 +224,15 @@ class ICEBERG_EXPORT Catalog {
233224
/// \return the Transaction to create the table
234225
virtual std::unique_ptr<Transaction> StageCreate() = 0;
235226
};
227+
228+
/// \brief Instantiate a builder to either create a table or start a create/replace
229+
/// transaction
230+
///
231+
/// \param identifier a table identifier
232+
/// \param schema a schema
233+
/// \return the builder to create a table or start a create/replace transaction
234+
virtual std::unique_ptr<TableBuilder> BuildTable(const TableIdentifier& identifier,
235+
const Schema& schema) const = 0;
236236
};
237237

238238
} // namespace iceberg

src/iceberg/catalog/in_memory_catalog.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ Result<std::shared_ptr<Table>> InMemoryCatalog::RegisterTable(
441441
return LoadTable(identifier);
442442
}
443443

444-
std::unique_ptr<TableBuilder> InMemoryCatalog::BuildTable(
444+
std::unique_ptr<Catalog::TableBuilder> InMemoryCatalog::BuildTable(
445445
const TableIdentifier& identifier, const Schema& schema) const {
446446
throw IcebergError("not implemented");
447447
}

src/iceberg/catalog/in_memory_catalog.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class ICEBERG_EXPORT InMemoryCatalog
9090
const TableIdentifier& identifier,
9191
const std::string& metadata_file_location) override;
9292

93-
std::unique_ptr<iceberg::TableBuilder> BuildTable(const TableIdentifier& identifier,
93+
std::unique_ptr<Catalog::TableBuilder> BuildTable(const TableIdentifier& identifier,
9494
const Schema& schema) const override;
9595

9696
private:

src/iceberg/expression/literal.cc

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include <cmath>
2323
#include <concepts>
24-
#include <sstream>
2524

2625
#include "iceberg/exception.h"
2726

@@ -126,22 +125,28 @@ Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
126125
: value_(std::move(value)), type_(std::move(type)) {}
127126

128127
// Factory methods
129-
Literal Literal::Boolean(bool value) { return {Value{value}, iceberg::boolean()}; }
128+
Literal Literal::Boolean(bool value) { return {Value{value}, boolean()}; }
130129

131-
Literal Literal::Int(int32_t value) { return {Value{value}, iceberg::int32()}; }
130+
Literal Literal::Int(int32_t value) { return {Value{value}, int32()}; }
132131

133-
Literal Literal::Long(int64_t value) { return {Value{value}, iceberg::int64()}; }
132+
Literal Literal::Date(int32_t value) { return {Value{value}, date()}; }
134133

135-
Literal Literal::Float(float value) { return {Value{value}, iceberg::float32()}; }
134+
Literal Literal::Long(int64_t value) { return {Value{value}, int64()}; }
136135

137-
Literal Literal::Double(double value) { return {Value{value}, iceberg::float64()}; }
136+
Literal Literal::Time(int64_t value) { return {Value{value}, time()}; }
138137

139-
Literal Literal::String(std::string value) {
140-
return {Value{std::move(value)}, iceberg::string()};
141-
}
138+
Literal Literal::Timestamp(int64_t value) { return {Value{value}, timestamp()}; }
139+
140+
Literal Literal::TimestampTz(int64_t value) { return {Value{value}, timestamp_tz()}; }
141+
142+
Literal Literal::Float(float value) { return {Value{value}, float32()}; }
143+
144+
Literal Literal::Double(double value) { return {Value{value}, float64()}; }
145+
146+
Literal Literal::String(std::string value) { return {Value{std::move(value)}, string()}; }
142147

143148
Literal Literal::Binary(std::vector<uint8_t> value) {
144-
return {Value{std::move(value)}, iceberg::binary()};
149+
return {Value{std::move(value)}, binary()};
145150
}
146151

147152
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
@@ -188,8 +193,9 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
188193
return std::partial_ordering::unordered;
189194
}
190195

191-
// If either value is AboveMax or BelowMin, comparison is unordered
192-
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin()) {
196+
// If either value is AboveMax, BelowMin or null, comparison is unordered
197+
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin() ||
198+
IsNull() || other.IsNull()) {
193199
return std::partial_ordering::unordered;
194200
}
195201

@@ -202,13 +208,16 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
202208
return this_val ? std::partial_ordering::greater : std::partial_ordering::less;
203209
}
204210

205-
case TypeId::kInt: {
211+
case TypeId::kInt:
212+
case TypeId::kDate: {
206213
auto this_val = std::get<int32_t>(value_);
207214
auto other_val = std::get<int32_t>(other.value_);
208215
return this_val <=> other_val;
209216
}
210217

211-
case TypeId::kLong: {
218+
case TypeId::kLong:
219+
case TypeId::kTimestamp:
220+
case TypeId::kTimestampTz: {
212221
auto this_val = std::get<int64_t>(value_);
213222
auto other_val = std::get<int64_t>(other.value_);
214223
return this_val <=> other_val;
@@ -253,6 +262,9 @@ std::string Literal::ToString() const {
253262
if (std::holds_alternative<AboveMax>(value_)) {
254263
return "aboveMax";
255264
}
265+
if (std::holds_alternative<std::monostate>(value_)) {
266+
return "null";
267+
}
256268

257269
switch (type_->type_id()) {
258270
case TypeId::kBoolean: {
@@ -301,6 +313,8 @@ bool Literal::IsBelowMin() const { return std::holds_alternative<BelowMin>(value
301313

302314
bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value_); }
303315

316+
bool Literal::IsNull() const { return std::holds_alternative<std::monostate>(value_); }
317+
304318
// LiteralCaster implementation
305319

306320
Result<Literal> LiteralCaster::CastTo(const Literal& literal,
@@ -312,7 +326,8 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
312326

313327
// Handle special values
314328
if (std::holds_alternative<Literal::BelowMin>(literal.value_) ||
315-
std::holds_alternative<Literal::AboveMax>(literal.value_)) {
329+
std::holds_alternative<Literal::AboveMax>(literal.value_) ||
330+
std::holds_alternative<std::monostate>(literal.value_)) {
316331
// Cannot cast type for special values
317332
return NotSupported("Cannot cast type for {}", literal.ToString());
318333
}

src/iceberg/expression/literal.h

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace iceberg {
3232

3333
/// \brief Literal is a literal value that is associated with a primitive type.
3434
class ICEBERG_EXPORT Literal {
35-
private:
35+
public:
3636
/// \brief Sentinel value to indicate that the literal value is below the valid range
3737
/// of a specific primitive type. It can happen when casting a literal to a narrower
3838
/// primitive type.
@@ -48,27 +48,35 @@ class ICEBERG_EXPORT Literal {
4848
bool operator==(const AboveMax&) const = default;
4949
std::strong_ordering operator<=>(const AboveMax&) const = default;
5050
};
51-
52-
using Value = std::variant<bool, // for boolean
53-
int32_t, // for int, date
54-
int64_t, // for long, timestamp, timestamp_tz, time
55-
float, // for float
56-
double, // for double
57-
std::string, // for string
51+
using Value = std::variant<std::monostate, // for null
52+
bool, // for boolean
53+
int32_t, // for int, date
54+
int64_t, // for long, timestamp, timestamp_tz, time
55+
float, // for float
56+
double, // for double
57+
std::string, // for string
5858
std::vector<uint8_t>, // for binary, fixed
5959
std::array<uint8_t, 16>, // for uuid and decimal
6060
BelowMin, AboveMax>;
6161

62-
public:
6362
/// \brief Factory methods for primitive types
6463
static Literal Boolean(bool value);
6564
static Literal Int(int32_t value);
65+
static Literal Date(int32_t value);
6666
static Literal Long(int64_t value);
67+
static Literal Time(int64_t value);
68+
static Literal Timestamp(int64_t value);
69+
static Literal TimestampTz(int64_t value);
6770
static Literal Float(float value);
6871
static Literal Double(double value);
6972
static Literal String(std::string value);
7073
static Literal Binary(std::vector<uint8_t> value);
7174

75+
/// \brief Create a literal representing a null value.
76+
static Literal Null(std::shared_ptr<PrimitiveType> type) {
77+
return {Value{std::monostate{}}, std::move(type)};
78+
}
79+
7280
/// \brief Restore a literal from single-value serialization.
7381
///
7482
/// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization)
@@ -85,6 +93,9 @@ class ICEBERG_EXPORT Literal {
8593
/// \brief Get the literal type.
8694
const std::shared_ptr<PrimitiveType>& type() const;
8795

96+
/// \brief Get the literal value.
97+
const Value& value() const { return value_; }
98+
8899
/// \brief Converts this literal to a literal of the given type.
89100
///
90101
/// When a predicate is bound to a concrete data column, literals are converted to match
@@ -123,6 +134,10 @@ class ICEBERG_EXPORT Literal {
123134
/// \return true if this literal represents a BelowMin value, false otherwise
124135
bool IsBelowMin() const;
125136

137+
/// Check if this literal is null.
138+
/// \return true if this literal is null, false otherwise
139+
bool IsNull() const;
140+
126141
std::string ToString() const;
127142

128143
private:

src/iceberg/file_writer.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/file_writer.h
23+
/// Writer interface for file formats like Parquet, Avro and ORC.
24+
25+
#include <functional>
26+
#include <memory>
27+
#include <optional>
28+
29+
#include "iceberg/arrow_c_data.h"
30+
#include "iceberg/file_format.h"
31+
#include "iceberg/result.h"
32+
#include "iceberg/type_fwd.h"
33+
34+
namespace iceberg {
35+
36+
/// \brief Options for creating a writer.
37+
struct ICEBERG_EXPORT WriterOptions {
38+
/// \brief The path to the file to write.
39+
std::string path;
40+
/// \brief The schema of the data to write.
41+
ArrowSchema schema;
42+
/// \brief FileIO instance to open the file. Writer implementations should down cast it
43+
/// to the specific FileIO implementation. By default, the `iceberg-bundle` library uses
44+
/// `ArrowFileSystemFileIO` as the default implementation.
45+
std::shared_ptr<class FileIO> io;
46+
/// \brief Format-specific or implementation-specific properties.
47+
std::unordered_map<std::string, std::string> properties;
48+
};
49+
50+
/// \brief Base writer class to write data from different file formats.
51+
class ICEBERG_EXPORT Writer {
52+
public:
53+
virtual ~Writer() = default;
54+
Writer() = default;
55+
Writer(const Writer&) = delete;
56+
Writer& operator=(const Writer&) = delete;
57+
58+
/// \brief Open the writer.
59+
virtual Status Open(const struct WriterOptions& options) = 0;
60+
61+
/// \brief Close the writer.
62+
virtual Status Close() = 0;
63+
64+
/// \brief Write arrow data to the file.
65+
///
66+
/// \return Status of write results.
67+
virtual Status Write(ArrowArray data) = 0;
68+
};
69+
70+
/// \brief Factory function to create a writer of a specific file format.
71+
using WriterFactory = std::function<Result<std::unique_ptr<Writer>>()>;
72+
73+
/// \brief Registry of writer factories for different file formats.
74+
struct ICEBERG_EXPORT WriterFactoryRegistry {
75+
/// \brief Register a factory function for a specific file format.
76+
WriterFactoryRegistry(FileFormatType format_type, WriterFactory factory);
77+
78+
/// \brief Get the factory function for a specific file format.
79+
static WriterFactory& GetFactory(FileFormatType format_type);
80+
81+
/// \brief Open a writer for a specific file format.
82+
static Result<std::unique_ptr<Writer>> Open(FileFormatType format_type,
83+
const WriterOptions& options);
84+
};
85+
86+
} // namespace iceberg

src/iceberg/manifest_writer.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/manifest_writer.h
23+
/// Data writer interface for manifest files.
24+
25+
#include <memory>
26+
#include <vector>
27+
28+
#include "iceberg/file_writer.h"
29+
#include "iceberg/iceberg_export.h"
30+
#include "iceberg/type_fwd.h"
31+
32+
namespace iceberg {
33+
34+
/// \brief Write manifest entries to a manifest file.
35+
class ICEBERG_EXPORT ManifestWriter {
36+
public:
37+
virtual ~ManifestWriter() = default;
38+
virtual Status WriteManifestEntries(
39+
const std::vector<ManifestEntry>& entries) const = 0;
40+
41+
/// \brief Creates a writer for a manifest file.
42+
/// \param manifest_location Path to the manifest file.
43+
/// \param file_io File IO implementation to use.
44+
/// \return A Result containing the writer or an error.
45+
static Result<std::unique_ptr<ManifestWriter>> MakeWriter(
46+
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
47+
std::shared_ptr<Schema> partition_schema);
48+
};
49+
50+
/// \brief Write manifest files to a manifest list file.
51+
class ICEBERG_EXPORT ManifestListWriter {
52+
public:
53+
virtual ~ManifestListWriter() = default;
54+
virtual Status WriteManifestFiles(const std::vector<ManifestFile>& files) const = 0;
55+
56+
/// \brief Creates a writer for the manifest list.
57+
/// \param manifest_list_location Path to the manifest list file.
58+
/// \param file_io File IO implementation to use.
59+
/// \return A Result containing the writer or an error.
60+
static Result<std::unique_ptr<ManifestListWriter>> MakeWriter(
61+
std::string_view manifest_list_location, std::shared_ptr<FileIO> file_io);
62+
};
63+
64+
} // namespace iceberg

0 commit comments

Comments
 (0)