From 67c5e89fbfeea57109ad6a216d1cbe6f4df388e0 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 4 Feb 2025 12:28:54 +0900
Subject: [PATCH 1/2] Make field repr explicit about optionality, add exception
type
---
src/iceberg/exception.h | 39 ++++++++++++++++++++++++++++++
src/iceberg/schema_field.cc | 4 ++--
src/iceberg/type.cc | 27 ++++++++++-----------
src/iceberg/type.h | 44 +++++++++++++++++++---------------
test/core/schema_field_test.cc | 4 ++--
test/core/type_test.cc | 21 ++++++++--------
6 files changed, 91 insertions(+), 48 deletions(-)
create mode 100644 src/iceberg/exception.h
diff --git a/src/iceberg/exception.h b/src/iceberg/exception.h
new file mode 100644
index 000000000..5c1436e21
--- /dev/null
+++ b/src/iceberg/exception.h
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/exception.h
+/// Common exception types for Iceberg. Note that this library primarily uses
+/// return values for error handling, not exceptions. Some operations,
+/// however, will throw exceptions in contexts where no other option is
+/// available (e.g. a constructor). In those cases, an exception type from
+/// here will be used.
+
+#include
+
+namespace iceberg {
+
+/// \brief Base exception class for exceptions thrown by the Iceberg library.
+class ICEBERG_EXPORT IcebergError : public std::runtime_error {
+ public:
+ explicit IcebergError(const std::string& what) : std::runtime_error(what) {}
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc
index 4de00b87a..7158190cc 100644
--- a/src/iceberg/schema_field.cc
+++ b/src/iceberg/schema_field.cc
@@ -52,8 +52,8 @@ const std::shared_ptr& SchemaField::type() const { return type_; }
bool SchemaField::optional() const { return optional_; }
std::string SchemaField::ToString() const {
- return std::format("{} ({}): {}{}", name_, field_id_, *type_,
- optional_ ? "" : " (required)");
+ return std::format("{} ({}): {} {}", name_, field_id_, *type_,
+ optional_ ? "(optional)" : "(required)");
}
bool SchemaField::Equals(const SchemaField& other) const {
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index 47b7e36c4..0a7ab4c05 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -23,6 +23,7 @@
#include
#include
+#include "iceberg/exception.h"
#include "iceberg/util/formatter.h"
namespace iceberg {
@@ -32,7 +33,7 @@ StructType::StructType(std::vector fields) : fields_(std::move(fiel
for (const auto& field : fields_) {
auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), index);
if (!inserted) {
- throw std::runtime_error(
+ throw IcebergError(
std::format("StructType: duplicate field ID {} (field indices {} and {})",
field.field_id(), it->second, index));
}
@@ -66,8 +67,8 @@ std::optional> StructType::GetFieldByI
}
std::optional> StructType::GetFieldByName(
std::string_view name) const {
- // TODO: what is the right behavior if there are duplicate names? (Are
- // duplicate names permitted?)
+ // N.B. duplicate names are not permitted (looking at the Java
+ // implementation) so there is nothing in particular we need to do here
for (const auto& field : fields_) {
if (field.name() == name) {
return field;
@@ -85,9 +86,8 @@ bool StructType::Equals(const Type& other) const {
ListType::ListType(SchemaField element) : element_(std::move(element)) {
if (element_.name() != kElementName) {
- throw std::runtime_error(
- std::format("ListType: child field name should be '{}', was '{}'", kElementName,
- element_.name()));
+ throw IcebergError(std::format("ListType: child field name should be '{}', was '{}'",
+ kElementName, element_.name()));
}
}
@@ -136,14 +136,12 @@ bool ListType::Equals(const Type& other) const {
MapType::MapType(SchemaField key, SchemaField value)
: fields_{std::move(key), std::move(value)} {
if (this->key().name() != kKeyName) {
- throw std::runtime_error(
- std::format("MapType: key field name should be '{}', was '{}'", kKeyName,
- this->key().name()));
+ throw IcebergError(std::format("MapType: key field name should be '{}', was '{}'",
+ kKeyName, this->key().name()));
}
if (this->value().name() != kValueName) {
- throw std::runtime_error(
- std::format("MapType: value field name should be '{}', was '{}'", kValueName,
- this->value().name()));
+ throw IcebergError(std::format("MapType: value field name should be '{}', was '{}'",
+ kValueName, this->value().name()));
}
}
@@ -226,7 +224,7 @@ bool DoubleType::Equals(const Type& other) const {
DecimalType::DecimalType(int32_t precision, int32_t scale)
: precision_(precision), scale_(scale) {
if (precision < 0 || precision > kMaxPrecision) {
- throw std::runtime_error(
+ throw IcebergError(
std::format("DecimalType: precision must be in [0, 38], was {}", precision));
}
}
@@ -287,8 +285,7 @@ bool UuidType::Equals(const Type& other) const {
FixedType::FixedType(int32_t length) : length_(length) {
if (length < 0) {
- throw std::runtime_error(
- std::format("FixedType: length must be >= 0, was {}", length));
+ throw IcebergError(std::format("FixedType: length must be >= 0, was {}", length));
}
}
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index 3bff4bb12..540510628 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -41,7 +41,7 @@ namespace iceberg {
/// \brief Interface for a data type for a field.
class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
public:
- virtual ~Type() = default;
+ ~Type() override = default;
/// \brief Get the type ID.
[[nodiscard]] virtual TypeId type_id() const = 0;
@@ -79,14 +79,20 @@ class ICEBERG_EXPORT NestedType : public Type {
/// \brief Get a view of the child fields.
[[nodiscard]] virtual std::span fields() const = 0;
/// \brief Get a field by field ID.
+ ///
+ /// \note This is O(1) complexity.
[[nodiscard]] virtual std::optional>
GetFieldById(int32_t field_id) const = 0;
/// \brief Get a field by index.
+ ///
+ /// \note This is O(1) complexity.
[[nodiscard]] virtual std::optional>
GetFieldByIndex(int32_t index) const = 0;
/// \brief Get a field by name (case-sensitive). Behavior is undefined if
/// the field name is not unique; prefer GetFieldById or GetFieldByIndex
/// when possible.
+ ///
+ /// \note This is currently O(n) complexity.
[[nodiscard]] virtual std::optional>
GetFieldByName(std::string_view name) const = 0;
};
@@ -99,7 +105,7 @@ class ICEBERG_EXPORT NestedType : public Type {
class ICEBERG_EXPORT StructType : public NestedType {
public:
explicit StructType(std::vector fields);
- ~StructType() = default;
+ ~StructType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -129,7 +135,7 @@ class ICEBERG_EXPORT ListType : public NestedType {
explicit ListType(SchemaField element);
/// \brief Construct a list of the given element type.
ListType(int32_t field_id, std::shared_ptr type, bool optional);
- ~ListType() = default;
+ ~ListType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -157,7 +163,7 @@ class ICEBERG_EXPORT MapType : public NestedType {
/// \brief Construct a map of the given key/value fields. The field names
/// should be "key" and "value", respectively.
explicit MapType(SchemaField key, SchemaField value);
- ~MapType() = default;
+ ~MapType() override = default;
const SchemaField& key() const;
const SchemaField& value() const;
@@ -189,7 +195,7 @@ class ICEBERG_EXPORT MapType : public NestedType {
class ICEBERG_EXPORT BooleanType : public PrimitiveType {
public:
BooleanType() = default;
- ~BooleanType() = default;
+ ~BooleanType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -202,7 +208,7 @@ class ICEBERG_EXPORT BooleanType : public PrimitiveType {
class ICEBERG_EXPORT IntType : public PrimitiveType {
public:
IntType() = default;
- ~IntType() = default;
+ ~IntType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -215,7 +221,7 @@ class ICEBERG_EXPORT IntType : public PrimitiveType {
class ICEBERG_EXPORT LongType : public PrimitiveType {
public:
LongType() = default;
- ~LongType() = default;
+ ~LongType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -229,7 +235,7 @@ class ICEBERG_EXPORT LongType : public PrimitiveType {
class ICEBERG_EXPORT FloatType : public PrimitiveType {
public:
FloatType() = default;
- ~FloatType() = default;
+ ~FloatType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -243,7 +249,7 @@ class ICEBERG_EXPORT FloatType : public PrimitiveType {
class ICEBERG_EXPORT DoubleType : public PrimitiveType {
public:
DoubleType() = default;
- ~DoubleType() = default;
+ ~DoubleType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -259,7 +265,7 @@ class ICEBERG_EXPORT DecimalType : public PrimitiveType {
/// \brief Construct a decimal type with the given precision and scale.
DecimalType(int32_t precision, int32_t scale);
- ~DecimalType() = default;
+ ~DecimalType() override = default;
/// \brief Get the precision (the number of decimal digits).
[[nodiscard]] int32_t precision() const;
@@ -283,7 +289,7 @@ class ICEBERG_EXPORT DecimalType : public PrimitiveType {
class ICEBERG_EXPORT DateType : public PrimitiveType {
public:
DateType() = default;
- ~DateType() = default;
+ ~DateType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -297,7 +303,7 @@ class ICEBERG_EXPORT DateType : public PrimitiveType {
class ICEBERG_EXPORT TimeType : public PrimitiveType {
public:
TimeType() = default;
- ~TimeType() = default;
+ ~TimeType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -321,7 +327,7 @@ class ICEBERG_EXPORT TimestampBase : public PrimitiveType {
class ICEBERG_EXPORT TimestampType : public TimestampBase {
public:
TimestampType() = default;
- ~TimestampType() = default;
+ ~TimestampType() override = default;
bool is_zoned() const override;
TimeUnit time_unit() const override;
@@ -338,7 +344,7 @@ class ICEBERG_EXPORT TimestampType : public TimestampBase {
class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
public:
TimestampTzType() = default;
- ~TimestampTzType() = default;
+ ~TimestampTzType() override = default;
bool is_zoned() const override;
TimeUnit time_unit() const override;
@@ -354,7 +360,7 @@ class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
class ICEBERG_EXPORT BinaryType : public PrimitiveType {
public:
BinaryType() = default;
- ~BinaryType() = default;
+ ~BinaryType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -368,7 +374,7 @@ class ICEBERG_EXPORT BinaryType : public PrimitiveType {
class ICEBERG_EXPORT StringType : public PrimitiveType {
public:
StringType() = default;
- ~StringType() = default;
+ ~StringType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -381,8 +387,8 @@ class ICEBERG_EXPORT StringType : public PrimitiveType {
class ICEBERG_EXPORT FixedType : public PrimitiveType {
public:
/// \brief Construct a fixed type with the given length.
- FixedType(int32_t length);
- ~FixedType() = default;
+ explicit FixedType(int32_t length);
+ ~FixedType() override = default;
/// \brief The length (the number of bytes to store).
[[nodiscard]] int32_t length() const;
@@ -402,7 +408,7 @@ class ICEBERG_EXPORT FixedType : public PrimitiveType {
class ICEBERG_EXPORT UuidType : public PrimitiveType {
public:
UuidType() = default;
- ~UuidType() = default;
+ ~UuidType() override = default;
TypeId type_id() const override;
std::string ToString() const override;
diff --git a/test/core/schema_field_test.cc b/test/core/schema_field_test.cc
index d5fc63390..06b1df60e 100644
--- a/test/core/schema_field_test.cc
+++ b/test/core/schema_field_test.cc
@@ -44,8 +44,8 @@ TEST(SchemaFieldTest, Basics) {
EXPECT_EQ("foo bar", field.name());
EXPECT_EQ(iceberg::FixedType(10), *field.type());
EXPECT_TRUE(field.optional());
- EXPECT_EQ("foo bar (2): fixed(10)", field.ToString());
- EXPECT_EQ("foo bar (2): fixed(10)", std::format("{}", field));
+ EXPECT_EQ("foo bar (2): fixed(10) (optional)", field.ToString());
+ EXPECT_EQ("foo bar (2): fixed(10) (optional)", std::format("{}", field));
}
{
iceberg::SchemaField field = iceberg::SchemaField::MakeRequired(
diff --git a/test/core/type_test.cc b/test/core/type_test.cc
index 3a4b8d70d..0fc9a0aff 100644
--- a/test/core/type_test.cc
+++ b/test/core/type_test.cc
@@ -27,6 +27,7 @@
#include
#include
+#include "iceberg/exception.h"
#include "iceberg/util/formatter.h"
struct TypeTestCase {
@@ -209,7 +210,7 @@ const static TypeTestCase kNestedTypes[] = {
1, std::make_shared(), true),
.type_id = iceberg::TypeId::kList,
.primitive = false,
- .repr = "list",
+ .repr = "list",
},
{
.name = "list_list_int",
@@ -220,7 +221,7 @@ const static TypeTestCase kNestedTypes[] = {
false),
.type_id = iceberg::TypeId::kList,
.primitive = false,
- .repr = "list (required)>",
+ .repr = "list (required)>",
},
{
.name = "map_int_string",
@@ -245,7 +246,7 @@ const static TypeTestCase kNestedTypes[] = {
.primitive = false,
.repr = R"(struct<
foo (1): long (required)
- bar (2): string
+ bar (2): string (optional)
>)",
},
};
@@ -290,11 +291,11 @@ TEST(TypeTest, Decimal) {
ASSERT_EQ(-10, decimal.scale());
}
ASSERT_THAT([]() { iceberg::DecimalType decimal(-1, 10); },
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("precision must be in [0, 38], was -1")));
ASSERT_THAT([]() { iceberg::DecimalType decimal(39, 10); },
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("precision must be in [0, 38], was 39")));
}
@@ -312,7 +313,7 @@ TEST(TypeTest, Fixed) {
ASSERT_EQ(127, fixed.length());
}
ASSERT_THAT([]() { iceberg::FixedType decimal(-1); },
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("length must be >= 0, was -1")));
}
@@ -337,7 +338,7 @@ TEST(TypeTest, List) {
iceberg::ListType list(iceberg::SchemaField(
1, "wrongname", std::make_shared(), true));
},
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("child field name should be 'element', was 'wrongname'")));
}
@@ -369,7 +370,7 @@ TEST(TypeTest, Map) {
true);
iceberg::MapType map(key, value);
},
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("key field name should be 'key', was 'notkey'")));
ASSERT_THAT(
[]() {
@@ -378,7 +379,7 @@ TEST(TypeTest, Map) {
true);
iceberg::MapType map(key, value);
},
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("value field name should be 'value', was 'notvalue'")));
}
@@ -410,6 +411,6 @@ TEST(TypeTest, Struct) {
true);
iceberg::StructType struct_({field1, field2});
},
- ::testing::ThrowsMessage(
+ ::testing::ThrowsMessage(
::testing::HasSubstr("duplicate field ID 5")));
}
From 79c0cc0038d714e9fcad72d2b29cd3d8912fc1c8 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 4 Feb 2025 01:54:29 -0500
Subject: [PATCH 2/2] Update src/iceberg/schema_field.cc
Co-authored-by: Fokko Driesprong
---
src/iceberg/schema_field.cc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc
index 7158190cc..28081a71e 100644
--- a/src/iceberg/schema_field.cc
+++ b/src/iceberg/schema_field.cc
@@ -52,8 +52,8 @@ const std::shared_ptr& SchemaField::type() const { return type_; }
bool SchemaField::optional() const { return optional_; }
std::string SchemaField::ToString() const {
- return std::format("{} ({}): {} {}", name_, field_id_, *type_,
- optional_ ? "(optional)" : "(required)");
+ return std::format("{} ({}): {} ({})", name_, field_id_, *type_,
+ optional_ ? "optional" : "required");
}
bool SchemaField::Equals(const SchemaField& other) const {