From 67c5e89fbfeea57109ad6a216d1cbe6f4df388e0 Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 4 Feb 2025 12:28:54 +0900 Subject: [PATCH 1/2] Make field repr explicit about optionality, add exception type --- src/iceberg/exception.h | 39 ++++++++++++++++++++++++++++++ src/iceberg/schema_field.cc | 4 ++-- src/iceberg/type.cc | 27 ++++++++++----------- src/iceberg/type.h | 44 +++++++++++++++++++--------------- test/core/schema_field_test.cc | 4 ++-- test/core/type_test.cc | 21 ++++++++-------- 6 files changed, 91 insertions(+), 48 deletions(-) create mode 100644 src/iceberg/exception.h diff --git a/src/iceberg/exception.h b/src/iceberg/exception.h new file mode 100644 index 000000000..5c1436e21 --- /dev/null +++ b/src/iceberg/exception.h @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/exception.h +/// Common exception types for Iceberg. Note that this library primarily uses +/// return values for error handling, not exceptions. Some operations, +/// however, will throw exceptions in contexts where no other option is +/// available (e.g. a constructor). In those cases, an exception type from +/// here will be used. + +#include + +namespace iceberg { + +/// \brief Base exception class for exceptions thrown by the Iceberg library. +class ICEBERG_EXPORT IcebergError : public std::runtime_error { + public: + explicit IcebergError(const std::string& what) : std::runtime_error(what) {} +}; + +} // namespace iceberg diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc index 4de00b87a..7158190cc 100644 --- a/src/iceberg/schema_field.cc +++ b/src/iceberg/schema_field.cc @@ -52,8 +52,8 @@ const std::shared_ptr& SchemaField::type() const { return type_; } bool SchemaField::optional() const { return optional_; } std::string SchemaField::ToString() const { - return std::format("{} ({}): {}{}", name_, field_id_, *type_, - optional_ ? "" : " (required)"); + return std::format("{} ({}): {} {}", name_, field_id_, *type_, + optional_ ? "(optional)" : "(required)"); } bool SchemaField::Equals(const SchemaField& other) const { diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index 47b7e36c4..0a7ab4c05 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -23,6 +23,7 @@ #include #include +#include "iceberg/exception.h" #include "iceberg/util/formatter.h" namespace iceberg { @@ -32,7 +33,7 @@ StructType::StructType(std::vector fields) : fields_(std::move(fiel for (const auto& field : fields_) { auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), index); if (!inserted) { - throw std::runtime_error( + throw IcebergError( std::format("StructType: duplicate field ID {} (field indices {} and {})", field.field_id(), it->second, index)); } @@ -66,8 +67,8 @@ std::optional> StructType::GetFieldByI } std::optional> StructType::GetFieldByName( std::string_view name) const { - // TODO: what is the right behavior if there are duplicate names? (Are - // duplicate names permitted?) + // N.B. duplicate names are not permitted (looking at the Java + // implementation) so there is nothing in particular we need to do here for (const auto& field : fields_) { if (field.name() == name) { return field; @@ -85,9 +86,8 @@ bool StructType::Equals(const Type& other) const { ListType::ListType(SchemaField element) : element_(std::move(element)) { if (element_.name() != kElementName) { - throw std::runtime_error( - std::format("ListType: child field name should be '{}', was '{}'", kElementName, - element_.name())); + throw IcebergError(std::format("ListType: child field name should be '{}', was '{}'", + kElementName, element_.name())); } } @@ -136,14 +136,12 @@ bool ListType::Equals(const Type& other) const { MapType::MapType(SchemaField key, SchemaField value) : fields_{std::move(key), std::move(value)} { if (this->key().name() != kKeyName) { - throw std::runtime_error( - std::format("MapType: key field name should be '{}', was '{}'", kKeyName, - this->key().name())); + throw IcebergError(std::format("MapType: key field name should be '{}', was '{}'", + kKeyName, this->key().name())); } if (this->value().name() != kValueName) { - throw std::runtime_error( - std::format("MapType: value field name should be '{}', was '{}'", kValueName, - this->value().name())); + throw IcebergError(std::format("MapType: value field name should be '{}', was '{}'", + kValueName, this->value().name())); } } @@ -226,7 +224,7 @@ bool DoubleType::Equals(const Type& other) const { DecimalType::DecimalType(int32_t precision, int32_t scale) : precision_(precision), scale_(scale) { if (precision < 0 || precision > kMaxPrecision) { - throw std::runtime_error( + throw IcebergError( std::format("DecimalType: precision must be in [0, 38], was {}", precision)); } } @@ -287,8 +285,7 @@ bool UuidType::Equals(const Type& other) const { FixedType::FixedType(int32_t length) : length_(length) { if (length < 0) { - throw std::runtime_error( - std::format("FixedType: length must be >= 0, was {}", length)); + throw IcebergError(std::format("FixedType: length must be >= 0, was {}", length)); } } diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 3bff4bb12..540510628 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -41,7 +41,7 @@ namespace iceberg { /// \brief Interface for a data type for a field. class ICEBERG_EXPORT Type : public iceberg::util::Formattable { public: - virtual ~Type() = default; + ~Type() override = default; /// \brief Get the type ID. [[nodiscard]] virtual TypeId type_id() const = 0; @@ -79,14 +79,20 @@ class ICEBERG_EXPORT NestedType : public Type { /// \brief Get a view of the child fields. [[nodiscard]] virtual std::span fields() const = 0; /// \brief Get a field by field ID. + /// + /// \note This is O(1) complexity. [[nodiscard]] virtual std::optional> GetFieldById(int32_t field_id) const = 0; /// \brief Get a field by index. + /// + /// \note This is O(1) complexity. [[nodiscard]] virtual std::optional> GetFieldByIndex(int32_t index) const = 0; /// \brief Get a field by name (case-sensitive). Behavior is undefined if /// the field name is not unique; prefer GetFieldById or GetFieldByIndex /// when possible. + /// + /// \note This is currently O(n) complexity. [[nodiscard]] virtual std::optional> GetFieldByName(std::string_view name) const = 0; }; @@ -99,7 +105,7 @@ class ICEBERG_EXPORT NestedType : public Type { class ICEBERG_EXPORT StructType : public NestedType { public: explicit StructType(std::vector fields); - ~StructType() = default; + ~StructType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -129,7 +135,7 @@ class ICEBERG_EXPORT ListType : public NestedType { explicit ListType(SchemaField element); /// \brief Construct a list of the given element type. ListType(int32_t field_id, std::shared_ptr type, bool optional); - ~ListType() = default; + ~ListType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -157,7 +163,7 @@ class ICEBERG_EXPORT MapType : public NestedType { /// \brief Construct a map of the given key/value fields. The field names /// should be "key" and "value", respectively. explicit MapType(SchemaField key, SchemaField value); - ~MapType() = default; + ~MapType() override = default; const SchemaField& key() const; const SchemaField& value() const; @@ -189,7 +195,7 @@ class ICEBERG_EXPORT MapType : public NestedType { class ICEBERG_EXPORT BooleanType : public PrimitiveType { public: BooleanType() = default; - ~BooleanType() = default; + ~BooleanType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -202,7 +208,7 @@ class ICEBERG_EXPORT BooleanType : public PrimitiveType { class ICEBERG_EXPORT IntType : public PrimitiveType { public: IntType() = default; - ~IntType() = default; + ~IntType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -215,7 +221,7 @@ class ICEBERG_EXPORT IntType : public PrimitiveType { class ICEBERG_EXPORT LongType : public PrimitiveType { public: LongType() = default; - ~LongType() = default; + ~LongType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -229,7 +235,7 @@ class ICEBERG_EXPORT LongType : public PrimitiveType { class ICEBERG_EXPORT FloatType : public PrimitiveType { public: FloatType() = default; - ~FloatType() = default; + ~FloatType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -243,7 +249,7 @@ class ICEBERG_EXPORT FloatType : public PrimitiveType { class ICEBERG_EXPORT DoubleType : public PrimitiveType { public: DoubleType() = default; - ~DoubleType() = default; + ~DoubleType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -259,7 +265,7 @@ class ICEBERG_EXPORT DecimalType : public PrimitiveType { /// \brief Construct a decimal type with the given precision and scale. DecimalType(int32_t precision, int32_t scale); - ~DecimalType() = default; + ~DecimalType() override = default; /// \brief Get the precision (the number of decimal digits). [[nodiscard]] int32_t precision() const; @@ -283,7 +289,7 @@ class ICEBERG_EXPORT DecimalType : public PrimitiveType { class ICEBERG_EXPORT DateType : public PrimitiveType { public: DateType() = default; - ~DateType() = default; + ~DateType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -297,7 +303,7 @@ class ICEBERG_EXPORT DateType : public PrimitiveType { class ICEBERG_EXPORT TimeType : public PrimitiveType { public: TimeType() = default; - ~TimeType() = default; + ~TimeType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -321,7 +327,7 @@ class ICEBERG_EXPORT TimestampBase : public PrimitiveType { class ICEBERG_EXPORT TimestampType : public TimestampBase { public: TimestampType() = default; - ~TimestampType() = default; + ~TimestampType() override = default; bool is_zoned() const override; TimeUnit time_unit() const override; @@ -338,7 +344,7 @@ class ICEBERG_EXPORT TimestampType : public TimestampBase { class ICEBERG_EXPORT TimestampTzType : public TimestampBase { public: TimestampTzType() = default; - ~TimestampTzType() = default; + ~TimestampTzType() override = default; bool is_zoned() const override; TimeUnit time_unit() const override; @@ -354,7 +360,7 @@ class ICEBERG_EXPORT TimestampTzType : public TimestampBase { class ICEBERG_EXPORT BinaryType : public PrimitiveType { public: BinaryType() = default; - ~BinaryType() = default; + ~BinaryType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -368,7 +374,7 @@ class ICEBERG_EXPORT BinaryType : public PrimitiveType { class ICEBERG_EXPORT StringType : public PrimitiveType { public: StringType() = default; - ~StringType() = default; + ~StringType() override = default; TypeId type_id() const override; std::string ToString() const override; @@ -381,8 +387,8 @@ class ICEBERG_EXPORT StringType : public PrimitiveType { class ICEBERG_EXPORT FixedType : public PrimitiveType { public: /// \brief Construct a fixed type with the given length. - FixedType(int32_t length); - ~FixedType() = default; + explicit FixedType(int32_t length); + ~FixedType() override = default; /// \brief The length (the number of bytes to store). [[nodiscard]] int32_t length() const; @@ -402,7 +408,7 @@ class ICEBERG_EXPORT FixedType : public PrimitiveType { class ICEBERG_EXPORT UuidType : public PrimitiveType { public: UuidType() = default; - ~UuidType() = default; + ~UuidType() override = default; TypeId type_id() const override; std::string ToString() const override; diff --git a/test/core/schema_field_test.cc b/test/core/schema_field_test.cc index d5fc63390..06b1df60e 100644 --- a/test/core/schema_field_test.cc +++ b/test/core/schema_field_test.cc @@ -44,8 +44,8 @@ TEST(SchemaFieldTest, Basics) { EXPECT_EQ("foo bar", field.name()); EXPECT_EQ(iceberg::FixedType(10), *field.type()); EXPECT_TRUE(field.optional()); - EXPECT_EQ("foo bar (2): fixed(10)", field.ToString()); - EXPECT_EQ("foo bar (2): fixed(10)", std::format("{}", field)); + EXPECT_EQ("foo bar (2): fixed(10) (optional)", field.ToString()); + EXPECT_EQ("foo bar (2): fixed(10) (optional)", std::format("{}", field)); } { iceberg::SchemaField field = iceberg::SchemaField::MakeRequired( diff --git a/test/core/type_test.cc b/test/core/type_test.cc index 3a4b8d70d..0fc9a0aff 100644 --- a/test/core/type_test.cc +++ b/test/core/type_test.cc @@ -27,6 +27,7 @@ #include #include +#include "iceberg/exception.h" #include "iceberg/util/formatter.h" struct TypeTestCase { @@ -209,7 +210,7 @@ const static TypeTestCase kNestedTypes[] = { 1, std::make_shared(), true), .type_id = iceberg::TypeId::kList, .primitive = false, - .repr = "list", + .repr = "list", }, { .name = "list_list_int", @@ -220,7 +221,7 @@ const static TypeTestCase kNestedTypes[] = { false), .type_id = iceberg::TypeId::kList, .primitive = false, - .repr = "list (required)>", + .repr = "list (required)>", }, { .name = "map_int_string", @@ -245,7 +246,7 @@ const static TypeTestCase kNestedTypes[] = { .primitive = false, .repr = R"(struct< foo (1): long (required) - bar (2): string + bar (2): string (optional) >)", }, }; @@ -290,11 +291,11 @@ TEST(TypeTest, Decimal) { ASSERT_EQ(-10, decimal.scale()); } ASSERT_THAT([]() { iceberg::DecimalType decimal(-1, 10); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("precision must be in [0, 38], was -1"))); ASSERT_THAT([]() { iceberg::DecimalType decimal(39, 10); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("precision must be in [0, 38], was 39"))); } @@ -312,7 +313,7 @@ TEST(TypeTest, Fixed) { ASSERT_EQ(127, fixed.length()); } ASSERT_THAT([]() { iceberg::FixedType decimal(-1); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("length must be >= 0, was -1"))); } @@ -337,7 +338,7 @@ TEST(TypeTest, List) { iceberg::ListType list(iceberg::SchemaField( 1, "wrongname", std::make_shared(), true)); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("child field name should be 'element', was 'wrongname'"))); } @@ -369,7 +370,7 @@ TEST(TypeTest, Map) { true); iceberg::MapType map(key, value); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("key field name should be 'key', was 'notkey'"))); ASSERT_THAT( []() { @@ -378,7 +379,7 @@ TEST(TypeTest, Map) { true); iceberg::MapType map(key, value); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("value field name should be 'value', was 'notvalue'"))); } @@ -410,6 +411,6 @@ TEST(TypeTest, Struct) { true); iceberg::StructType struct_({field1, field2}); }, - ::testing::ThrowsMessage( + ::testing::ThrowsMessage( ::testing::HasSubstr("duplicate field ID 5"))); } From 79c0cc0038d714e9fcad72d2b29cd3d8912fc1c8 Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 4 Feb 2025 01:54:29 -0500 Subject: [PATCH 2/2] Update src/iceberg/schema_field.cc Co-authored-by: Fokko Driesprong --- src/iceberg/schema_field.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc index 7158190cc..28081a71e 100644 --- a/src/iceberg/schema_field.cc +++ b/src/iceberg/schema_field.cc @@ -52,8 +52,8 @@ const std::shared_ptr& SchemaField::type() const { return type_; } bool SchemaField::optional() const { return optional_; } std::string SchemaField::ToString() const { - return std::format("{} ({}): {} {}", name_, field_id_, *type_, - optional_ ? "(optional)" : "(required)"); + return std::format("{} ({}): {} ({})", name_, field_id_, *type_, + optional_ ? "optional" : "required"); } bool SchemaField::Equals(const SchemaField& other) const {