diff --git a/docs/Doxyfile b/docs/Doxyfile index d576080fe..75d69394e 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -195,7 +195,7 @@ INLINE_INHERITED_MEMB = NO # shortest path that makes the file name unique will be used # The default value is: YES. -FULL_PATH_NAMES = NO +FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand @@ -207,7 +207,7 @@ FULL_PATH_NAMES = NO # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = +STRIP_FROM_PATH = ../src # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 4bebe4e4e..91706dc87 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -set(ICEBERG_SOURCES demo_table.cc) +set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc) add_iceberg_lib(iceberg SOURCES diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc new file mode 100644 index 000000000..52a57db7a --- /dev/null +++ b/src/iceberg/schema.cc @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/schema.h" + +#include + +#include "iceberg/type.h" +#include "iceberg/util/formatter.h" // IWYU pragma: keep + +namespace iceberg { + +Schema::Schema(int32_t schema_id, std::vector fields) + : StructType(std::move(fields)), schema_id_(schema_id) {} + +int32_t Schema::schema_id() const { return schema_id_; } + +std::string Schema::ToString() const { + std::string repr = "schema<"; + for (const auto& field : fields_) { + std::format_to(std::back_inserter(repr), " {}\n", field); + } + repr += ">"; + return repr; +} + +bool Schema::Equals(const Schema& other) const { + return schema_id_ == other.schema_id_ && fields_ == other.fields_; +} + +} // namespace iceberg diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h new file mode 100644 index 000000000..c58802d2f --- /dev/null +++ b/src/iceberg/schema.h @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/schema.h +/// Schemas for Iceberg tables. This header contains the definition of Schema +/// and any utility functions. See iceberg/type.h and iceberg/field.h as well. + +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/schema_field.h" +#include "iceberg/type.h" + +namespace iceberg { + +/// \brief A schema for a Table. +/// +/// A schema is a list of typed columns, along with a unique integer ID. A +/// Table may have different schemas over its lifetime due to schema +/// evolution. +class ICEBERG_EXPORT Schema : public StructType { + public: + Schema(int32_t schema_id, std::vector fields); + + /// \brief Get the schema ID. + /// + /// A schema is identified by a unique ID for the purposes of schema + /// evolution. + [[nodiscard]] int32_t schema_id() const; + + [[nodiscard]] std::string ToString() const; + + friend bool operator==(const Schema& lhs, const Schema& rhs) { return lhs.Equals(rhs); } + + friend bool operator!=(const Schema& lhs, const Schema& rhs) { return !(lhs == rhs); } + + private: + /// \brief Compare two schemas for equality. + [[nodiscard]] bool Equals(const Schema& other) const; + + const int32_t schema_id_; +}; + +} // namespace iceberg diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc new file mode 100644 index 000000000..4de00b87a --- /dev/null +++ b/src/iceberg/schema_field.cc @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/schema_field.h" + +#include + +#include "iceberg/type.h" +#include "iceberg/util/formatter.h" + +namespace iceberg { + +SchemaField::SchemaField(int32_t field_id, std::string name, std::shared_ptr type, + bool optional) + : field_id_(field_id), + name_(std::move(name)), + type_(std::move(type)), + optional_(optional) {} + +SchemaField SchemaField::MakeOptional(int32_t field_id, std::string name, + std::shared_ptr type) { + return SchemaField(field_id, std::move(name), std::move(type), true); +} + +SchemaField SchemaField::MakeRequired(int32_t field_id, std::string name, + std::shared_ptr type) { + return SchemaField(field_id, std::move(name), std::move(type), false); +} + +int32_t SchemaField::field_id() const { return field_id_; } + +std::string_view SchemaField::name() const { return name_; } + +const std::shared_ptr& SchemaField::type() const { return type_; } + +bool SchemaField::optional() const { return optional_; } + +std::string SchemaField::ToString() const { + return std::format("{} ({}): {}{}", name_, field_id_, *type_, + optional_ ? "" : " (required)"); +} + +bool SchemaField::Equals(const SchemaField& other) const { + return field_id_ == other.field_id_ && name_ == other.name_ && *type_ == *other.type_ && + optional_ == other.optional_; +} + +} // namespace iceberg diff --git a/src/iceberg/schema_field.h b/src/iceberg/schema_field.h new file mode 100644 index 000000000..e37c2d2d8 --- /dev/null +++ b/src/iceberg/schema_field.h @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/schema_field.h +/// A (schema) field is a name and a type and is part of a schema or nested +/// type (e.g. a struct). + +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief A type combined with a name. +class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable { + public: + /// \brief Construct a field. + /// \param[in] field_id The field ID. + /// \param[in] name The field name. + /// \param[in] type The field type. + /// \param[in] optional Whether values of this field are required or nullable. + SchemaField(int32_t field_id, std::string name, std::shared_ptr type, + bool optional); + + /// \brief Construct an optional (nullable) field. + static SchemaField MakeOptional(int32_t field_id, std::string name, + std::shared_ptr type); + /// \brief Construct a required (non-null) field. + static SchemaField MakeRequired(int32_t field_id, std::string name, + std::shared_ptr type); + + /// \brief Get the field ID. + [[nodiscard]] int32_t field_id() const; + + /// \brief Get the field name. + [[nodiscard]] std::string_view name() const; + + /// \brief Get the field type. + [[nodiscard]] const std::shared_ptr& type() const; + + /// \brief Get whether the field is optional. + [[nodiscard]] bool optional() const; + + [[nodiscard]] std::string ToString() const; + + friend bool operator==(const SchemaField& lhs, const SchemaField& rhs) { + return lhs.Equals(rhs); + } + + friend bool operator!=(const SchemaField& lhs, const SchemaField& rhs) { + return !(lhs == rhs); + } + + private: + /// \brief Compare two fields for equality. + [[nodiscard]] bool Equals(const SchemaField& other) const; + + int32_t field_id_; + std::string name_; + std::shared_ptr type_; + bool optional_; +}; + +} // namespace iceberg diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc new file mode 100644 index 000000000..47b7e36c4 --- /dev/null +++ b/src/iceberg/type.cc @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/type.h" + +#include +#include +#include + +#include "iceberg/util/formatter.h" + +namespace iceberg { + +StructType::StructType(std::vector fields) : fields_(std::move(fields)) { + size_t index = 0; + for (const auto& field : fields_) { + auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), index); + if (!inserted) { + throw std::runtime_error( + std::format("StructType: duplicate field ID {} (field indices {} and {})", + field.field_id(), it->second, index)); + } + + ++index; + } +} + +TypeId StructType::type_id() const { return TypeId::kStruct; } +std::string StructType::ToString() const { + std::string repr = "struct<\n"; + for (const auto& field : fields_) { + std::format_to(std::back_inserter(repr), " {}\n", field); + } + repr += ">"; + return repr; +} +std::span StructType::fields() const { return fields_; } +std::optional> StructType::GetFieldById( + int32_t field_id) const { + auto it = field_id_to_index_.find(field_id); + if (it == field_id_to_index_.end()) return std::nullopt; + return fields_[it->second]; +} +std::optional> StructType::GetFieldByIndex( + int32_t index) const { + if (index < 0 || index >= static_cast(fields_.size())) { + return std::nullopt; + } + return fields_[index]; +} +std::optional> StructType::GetFieldByName( + std::string_view name) const { + // TODO: what is the right behavior if there are duplicate names? (Are + // duplicate names permitted?) + for (const auto& field : fields_) { + if (field.name() == name) { + return field; + } + } + return std::nullopt; +} +bool StructType::Equals(const Type& other) const { + if (other.type_id() != TypeId::kStruct) { + return false; + } + const auto& struct_ = static_cast(other); + return fields_ == struct_.fields_; +} + +ListType::ListType(SchemaField element) : element_(std::move(element)) { + if (element_.name() != kElementName) { + throw std::runtime_error( + std::format("ListType: child field name should be '{}', was '{}'", kElementName, + element_.name())); + } +} + +ListType::ListType(int32_t field_id, std::shared_ptr type, bool optional) + : element_(field_id, std::string(kElementName), std::move(type), optional) {} + +TypeId ListType::type_id() const { return TypeId::kList; } +std::string ListType::ToString() const { + // XXX: work around Clang/libc++: "<{}>" in a format string appears to get + // parsed as {<>} or something; split up the format string to avoid that + std::string repr = "list<"; + std::format_to(std::back_inserter(repr), "{}", element_); + repr += ">"; + return repr; +} +std::span ListType::fields() const { return {&element_, 1}; } +std::optional> ListType::GetFieldById( + int32_t field_id) const { + if (field_id == element_.field_id()) { + return std::cref(element_); + } + return std::nullopt; +} +std::optional> ListType::GetFieldByIndex( + int index) const { + if (index == 0) { + return std::cref(element_); + } + return std::nullopt; +} +std::optional> ListType::GetFieldByName( + std::string_view name) const { + if (name == element_.name()) { + return std::cref(element_); + } + return std::nullopt; +} +bool ListType::Equals(const Type& other) const { + if (other.type_id() != TypeId::kList) { + return false; + } + const auto& list = static_cast(other); + return element_ == list.element_; +} + +MapType::MapType(SchemaField key, SchemaField value) + : fields_{std::move(key), std::move(value)} { + if (this->key().name() != kKeyName) { + throw std::runtime_error( + std::format("MapType: key field name should be '{}', was '{}'", kKeyName, + this->key().name())); + } + if (this->value().name() != kValueName) { + throw std::runtime_error( + std::format("MapType: value field name should be '{}', was '{}'", kValueName, + this->value().name())); + } +} + +const SchemaField& MapType::key() const { return fields_[0]; } +const SchemaField& MapType::value() const { return fields_[1]; } +TypeId MapType::type_id() const { return TypeId::kMap; } +std::string MapType::ToString() const { + // XXX: work around Clang/libc++: "<{}>" in a format string appears to get + // parsed as {<>} or something; split up the format string to avoid that + std::string repr = "map<"; + + std::format_to(std::back_inserter(repr), "{}: {}", key(), value()); + repr += ">"; + return repr; +} +std::span MapType::fields() const { return fields_; } +std::optional> MapType::GetFieldById( + int32_t field_id) const { + if (field_id == key().field_id()) { + return key(); + } else if (field_id == value().field_id()) { + return value(); + } + return std::nullopt; +} +std::optional> MapType::GetFieldByIndex( + int32_t index) const { + if (index == 0) { + return key(); + } else if (index == 1) { + return value(); + } + return std::nullopt; +} +std::optional> MapType::GetFieldByName( + std::string_view name) const { + if (name == kKeyName) { + return key(); + } else if (name == kValueName) { + return value(); + } + return std::nullopt; +} +bool MapType::Equals(const Type& other) const { + if (other.type_id() != TypeId::kMap) { + return false; + } + const auto& map = static_cast(other); + return fields_ == map.fields_; +} + +TypeId BooleanType::type_id() const { return TypeId::kBoolean; } +std::string BooleanType::ToString() const { return "boolean"; } +bool BooleanType::Equals(const Type& other) const { + return other.type_id() == TypeId::kBoolean; +} + +TypeId IntType::type_id() const { return TypeId::kInt; } +std::string IntType::ToString() const { return "int"; } +bool IntType::Equals(const Type& other) const { return other.type_id() == TypeId::kInt; } + +TypeId LongType::type_id() const { return TypeId::kLong; } +std::string LongType::ToString() const { return "long"; } +bool LongType::Equals(const Type& other) const { + return other.type_id() == TypeId::kLong; +} + +TypeId FloatType::type_id() const { return TypeId::kFloat; } +std::string FloatType::ToString() const { return "float"; } +bool FloatType::Equals(const Type& other) const { + return other.type_id() == TypeId::kFloat; +} + +TypeId DoubleType::type_id() const { return TypeId::kDouble; } +std::string DoubleType::ToString() const { return "double"; } +bool DoubleType::Equals(const Type& other) const { + return other.type_id() == TypeId::kDouble; +} + +DecimalType::DecimalType(int32_t precision, int32_t scale) + : precision_(precision), scale_(scale) { + if (precision < 0 || precision > kMaxPrecision) { + throw std::runtime_error( + std::format("DecimalType: precision must be in [0, 38], was {}", precision)); + } +} + +int32_t DecimalType::precision() const { return precision_; } +int32_t DecimalType::scale() const { return scale_; } +TypeId DecimalType::type_id() const { return TypeId::kDecimal; } +std::string DecimalType::ToString() const { + return std::format("decimal({}, {})", precision_, scale_); +} +bool DecimalType::Equals(const Type& other) const { + if (other.type_id() != TypeId::kDecimal) { + return false; + } + const auto& decimal = static_cast(other); + return precision_ == decimal.precision_ && scale_ == decimal.scale_; +} + +TypeId DateType::type_id() const { return TypeId::kDate; } +std::string DateType::ToString() const { return "date"; } +bool DateType::Equals(const Type& other) const { + return other.type_id() == TypeId::kDate; +} + +TypeId TimeType::type_id() const { return TypeId::kTime; } +std::string TimeType::ToString() const { return "time"; } +bool TimeType::Equals(const Type& other) const { + return other.type_id() == TypeId::kTime; +} + +bool TimestampType::is_zoned() const { return false; } +TimeUnit TimestampType::time_unit() const { return TimeUnit::kMicrosecond; } +TypeId TimestampType::type_id() const { return TypeId::kTimestamp; } +std::string TimestampType::ToString() const { return "timestamp"; } +bool TimestampType::Equals(const Type& other) const { + return other.type_id() == TypeId::kTimestamp; +} + +bool TimestampTzType::is_zoned() const { return true; } +TimeUnit TimestampTzType::time_unit() const { return TimeUnit::kMicrosecond; } +TypeId TimestampTzType::type_id() const { return TypeId::kTimestampTz; } +std::string TimestampTzType::ToString() const { return "timestamptz"; } +bool TimestampTzType::Equals(const Type& other) const { + return other.type_id() == TypeId::kTimestampTz; +} + +TypeId StringType::type_id() const { return TypeId::kString; } +std::string StringType::ToString() const { return "string"; } +bool StringType::Equals(const Type& other) const { + return other.type_id() == TypeId::kString; +} + +TypeId UuidType::type_id() const { return TypeId::kUuid; } +std::string UuidType::ToString() const { return "uuid"; } +bool UuidType::Equals(const Type& other) const { + return other.type_id() == TypeId::kUuid; +} + +FixedType::FixedType(int32_t length) : length_(length) { + if (length < 0) { + throw std::runtime_error( + std::format("FixedType: length must be >= 0, was {}", length)); + } +} + +int32_t FixedType::length() const { return length_; } +TypeId FixedType::type_id() const { return TypeId::kFixed; } +std::string FixedType::ToString() const { return std::format("fixed({})", length_); } +bool FixedType::Equals(const Type& other) const { + if (other.type_id() != TypeId::kFixed) { + return false; + } + const auto& fixed = static_cast(other); + return length_ == fixed.length_; +} + +TypeId BinaryType::type_id() const { return TypeId::kBinary; } +std::string BinaryType::ToString() const { return "binary"; } +bool BinaryType::Equals(const Type& other) const { + return other.type_id() == TypeId::kBinary; +} + +} // namespace iceberg diff --git a/src/iceberg/type.h b/src/iceberg/type.h new file mode 100644 index 000000000..3bff4bb12 --- /dev/null +++ b/src/iceberg/type.h @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/type.h +/// Data types for Iceberg. This header defines the data types, but see +/// iceberg/type_fwd.h for the enum defining the list of types. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/schema_field.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief Interface for a data type for a field. +class ICEBERG_EXPORT Type : public iceberg::util::Formattable { + public: + virtual ~Type() = default; + + /// \brief Get the type ID. + [[nodiscard]] virtual TypeId type_id() const = 0; + + /// \brief Is this a primitive type (may not have child fields)? + [[nodiscard]] virtual bool is_primitive() const = 0; + + /// \brief Is this a nested type (may have child fields)? + [[nodiscard]] virtual bool is_nested() const = 0; + + /// \brief Compare two types for equality. + friend bool operator==(const Type& lhs, const Type& rhs) { return lhs.Equals(rhs); } + + /// \brief Compare two types for inequality. + friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == rhs); } + + protected: + /// \brief Compare two types for equality. + [[nodiscard]] virtual bool Equals(const Type& other) const = 0; +}; + +/// \brief A data type that does not have child fields. +class ICEBERG_EXPORT PrimitiveType : public Type { + public: + bool is_primitive() const override { return true; } + bool is_nested() const override { return false; } +}; + +/// \brief A data type that has child fields. +class ICEBERG_EXPORT NestedType : public Type { + public: + bool is_primitive() const override { return false; } + bool is_nested() const override { return true; } + + /// \brief Get a view of the child fields. + [[nodiscard]] virtual std::span fields() const = 0; + /// \brief Get a field by field ID. + [[nodiscard]] virtual std::optional> + GetFieldById(int32_t field_id) const = 0; + /// \brief Get a field by index. + [[nodiscard]] virtual std::optional> + GetFieldByIndex(int32_t index) const = 0; + /// \brief Get a field by name (case-sensitive). Behavior is undefined if + /// the field name is not unique; prefer GetFieldById or GetFieldByIndex + /// when possible. + [[nodiscard]] virtual std::optional> + GetFieldByName(std::string_view name) const = 0; +}; + +/// \defgroup type-nested Nested Types +/// Nested types have child fields. +/// @{ + +/// \brief A data type representing a struct with nested fields. +class ICEBERG_EXPORT StructType : public NestedType { + public: + explicit StructType(std::vector fields); + ~StructType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + std::span fields() const override; + std::optional> GetFieldById( + int32_t field_id) const override; + std::optional> GetFieldByIndex( + int32_t index) const override; + std::optional> GetFieldByName( + std::string_view name) const override; + + protected: + bool Equals(const Type& other) const override; + + std::vector fields_; + std::unordered_map field_id_to_index_; +}; + +/// \brief A data type representing a list of values. +class ICEBERG_EXPORT ListType : public NestedType { + public: + constexpr static const std::string_view kElementName = "element"; + + /// \brief Construct a list of the given element. The name of the child + /// field should be "element". + explicit ListType(SchemaField element); + /// \brief Construct a list of the given element type. + ListType(int32_t field_id, std::shared_ptr type, bool optional); + ~ListType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + std::span fields() const override; + std::optional> GetFieldById( + int32_t field_id) const override; + std::optional> GetFieldByIndex( + int32_t index) const override; + std::optional> GetFieldByName( + std::string_view name) const override; + + protected: + bool Equals(const Type& other) const override; + + SchemaField element_; +}; + +/// \brief A data type representing a dictionary of values. +class ICEBERG_EXPORT MapType : public NestedType { + public: + constexpr static const std::string_view kKeyName = "key"; + constexpr static const std::string_view kValueName = "value"; + + /// \brief Construct a map of the given key/value fields. The field names + /// should be "key" and "value", respectively. + explicit MapType(SchemaField key, SchemaField value); + ~MapType() = default; + + const SchemaField& key() const; + const SchemaField& value() const; + + TypeId type_id() const override; + std::string ToString() const override; + + std::span fields() const override; + std::optional> GetFieldById( + int32_t field_id) const override; + std::optional> GetFieldByIndex( + int32_t index) const override; + std::optional> GetFieldByName( + std::string_view name) const override; + + protected: + bool Equals(const Type& other) const override; + + std::array fields_; +}; + +/// @} + +/// \defgroup type-primitive Primitive Types +/// Primitive types do not have nested fields. +/// @{ + +/// \brief A data type representing a boolean (true or false). +class ICEBERG_EXPORT BooleanType : public PrimitiveType { + public: + BooleanType() = default; + ~BooleanType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a 32-bit signed integer. +class ICEBERG_EXPORT IntType : public PrimitiveType { + public: + IntType() = default; + ~IntType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a 64-bit signed integer. +class ICEBERG_EXPORT LongType : public PrimitiveType { + public: + LongType() = default; + ~LongType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a 32-bit (single precision) IEEE-754 +/// float. +class ICEBERG_EXPORT FloatType : public PrimitiveType { + public: + FloatType() = default; + ~FloatType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a 64-bit (double precision) IEEE-754 +/// float. +class ICEBERG_EXPORT DoubleType : public PrimitiveType { + public: + DoubleType() = default; + ~DoubleType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a fixed-precision decimal. +class ICEBERG_EXPORT DecimalType : public PrimitiveType { + public: + constexpr static const int32_t kMaxPrecision = 38; + + /// \brief Construct a decimal type with the given precision and scale. + DecimalType(int32_t precision, int32_t scale); + ~DecimalType() = default; + + /// \brief Get the precision (the number of decimal digits). + [[nodiscard]] int32_t precision() const; + /// \brief Get the scale (essentially, the number of decimal digits after + /// the decimal point; precisely, the value is scaled by $$10^{-s}$$.). + [[nodiscard]] int32_t scale() const; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; + + private: + int32_t precision_; + int32_t scale_; +}; + +/// \brief A data type representing a calendar date without reference to a +/// timezone or time. +class ICEBERG_EXPORT DateType : public PrimitiveType { + public: + DateType() = default; + ~DateType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a wall clock time in microseconds without +/// reference to a timezone or date. +class ICEBERG_EXPORT TimeType : public PrimitiveType { + public: + TimeType() = default; + ~TimeType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A base class for any timestamp time (irrespective of unit or +/// timezone). +class ICEBERG_EXPORT TimestampBase : public PrimitiveType { + public: + /// \brief Is this type zoned or naive? + [[nodiscard]] virtual bool is_zoned() const = 0; + /// \brief The time resolution. + [[nodiscard]] virtual TimeUnit time_unit() const = 0; +}; + +/// \brief A data type representing a timestamp in microseconds without +/// reference to a timezone. +class ICEBERG_EXPORT TimestampType : public TimestampBase { + public: + TimestampType() = default; + ~TimestampType() = default; + + bool is_zoned() const override; + TimeUnit time_unit() const override; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a timestamp as microseconds since the +/// epoch in UTC. A time zone or offset is not stored. +class ICEBERG_EXPORT TimestampTzType : public TimestampBase { + public: + TimestampTzType() = default; + ~TimestampTzType() = default; + + bool is_zoned() const override; + TimeUnit time_unit() const override; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing an arbitrary-length byte sequence. +class ICEBERG_EXPORT BinaryType : public PrimitiveType { + public: + BinaryType() = default; + ~BinaryType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing an arbitrary-length character sequence +/// (encoded in UTF-8). +class ICEBERG_EXPORT StringType : public PrimitiveType { + public: + StringType() = default; + ~StringType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// \brief A data type representing a fixed-length bytestring. +class ICEBERG_EXPORT FixedType : public PrimitiveType { + public: + /// \brief Construct a fixed type with the given length. + FixedType(int32_t length); + ~FixedType() = default; + + /// \brief The length (the number of bytes to store). + [[nodiscard]] int32_t length() const; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; + + private: + int32_t length_; +}; + +/// \brief A data type representing a UUID. While defined as a distinct type, +/// it is effectively a fixed(16). +class ICEBERG_EXPORT UuidType : public PrimitiveType { + public: + UuidType() = default; + ~UuidType() = default; + + TypeId type_id() const override; + std::string ToString() const override; + + protected: + bool Equals(const Type& other) const override; +}; + +/// @} + +} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h new file mode 100644 index 000000000..89043938b --- /dev/null +++ b/src/iceberg/type_fwd.h @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/type_fwd.h +/// Forward declarations and enum definitions. When writing your own headers, +/// you can include this instead of the "full" headers to help reduce compile +/// times. + +namespace iceberg { + +/// \brief A data type. +/// +/// This is not a complete data type by itself because some types are nested +/// and/or parameterized. +/// +/// Iceberg V3 types are not currently supported. +enum class TypeId { + kStruct, + kList, + kMap, + kBoolean, + kInt, + kLong, + kFloat, + kDouble, + kDecimal, + kDate, + kTime, + kTimestamp, + kTimestampTz, + kString, + kUuid, + kFixed, + kBinary, +}; + +/// \brief The time unit. In Iceberg V3 nanoseconds are also supported. +enum class TimeUnit { + kMicrosecond, +}; + +class BinaryType; +class BooleanType; +class DateType; +class DecimalType; +class FixedType; +class FloatType; +class DoubleType; +class IntType; +class LongType; +class ListType; +class MapType; +class NestedType; +class PrimitiveType; +class Schema; +class SchemaField; +class StringType; +class StructType; +class TimeType; +class TimestampBase; +class TimestampType; +class TimestampTzType; +class Type; +class UuidType; + +} // namespace iceberg diff --git a/src/iceberg/util/formattable.h b/src/iceberg/util/formattable.h new file mode 100644 index 000000000..422c5a921 --- /dev/null +++ b/src/iceberg/util/formattable.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/util/formattable.h +/// Interface for objects that can be formatted via std::format. The actual +/// std::formatter specialization is in iceberg/util/formatter.h to avoid +/// bringing in unnecessarily. + +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg::util { + +/// \brief Interface for objects that can be formatted via std::format. +/// +/// You must include iceberg/util/formatter.h when calling std::format. +class ICEBERG_EXPORT Formattable { + public: + virtual ~Formattable() = default; + + /// \brief Get a user-readable string representation. + virtual std::string ToString() const = 0; +}; + +} // namespace iceberg::util diff --git a/src/iceberg/util/formatter.h b/src/iceberg/util/formatter.h new file mode 100644 index 000000000..d9a67415f --- /dev/null +++ b/src/iceberg/util/formatter.h @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/util/formatter.h +/// A specialization of std::formatter for Formattable objects. This header +/// is separate from iceberg/util/formattable.h so that the latter (which is +/// meant to be included widely) does not leak unnecessarily into +/// other headers. You must include this header to format a Formattable. + +#include +#include +#include + +#include "iceberg/util/formattable.h" + +/// \brief Make all classes deriving from iceberg::util::Formattable +/// formattable with std::format. +template Derived> +struct std::formatter : std::formatter { + template + auto format(const iceberg::util::Formattable& obj, FormatContext& ctx) const { + return std::formatter::format(obj.ToString(), ctx); + } +}; diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt index 551201779..a7fba1cab 100644 --- a/test/core/CMakeLists.txt +++ b/test/core/CMakeLists.txt @@ -16,7 +16,8 @@ # under the License. add_executable(core_unittest) -target_sources(core_unittest PRIVATE core_unittest.cc) -target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main) +target_sources(core_unittest PRIVATE core_unittest.cc schema_test.cc schema_field_test.cc + type_test.cc) +target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main GTest::gmock) target_include_directories(core_unittest PRIVATE "${ICEBERG_INCLUDES}") add_test(NAME core_unittest COMMAND core_unittest) diff --git a/test/core/schema_field_test.cc b/test/core/schema_field_test.cc new file mode 100644 index 000000000..d5fc63390 --- /dev/null +++ b/test/core/schema_field_test.cc @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/schema_field.h" + +#include +#include + +#include + +#include "iceberg/type.h" +#include "iceberg/util/formatter.h" + +TEST(SchemaFieldTest, Basics) { + { + iceberg::SchemaField field(1, "foo", std::make_shared(), false); + EXPECT_EQ(1, field.field_id()); + EXPECT_EQ("foo", field.name()); + EXPECT_EQ(iceberg::TypeId::kInt, field.type()->type_id()); + EXPECT_FALSE(field.optional()); + EXPECT_EQ("foo (1): int (required)", field.ToString()); + EXPECT_EQ("foo (1): int (required)", std::format("{}", field)); + } + { + iceberg::SchemaField field = iceberg::SchemaField::MakeOptional( + 2, "foo bar", std::make_shared(10)); + EXPECT_EQ(2, field.field_id()); + EXPECT_EQ("foo bar", field.name()); + EXPECT_EQ(iceberg::FixedType(10), *field.type()); + EXPECT_TRUE(field.optional()); + EXPECT_EQ("foo bar (2): fixed(10)", field.ToString()); + EXPECT_EQ("foo bar (2): fixed(10)", std::format("{}", field)); + } + { + iceberg::SchemaField field = iceberg::SchemaField::MakeRequired( + 2, "foo bar", std::make_shared(10)); + EXPECT_EQ(2, field.field_id()); + EXPECT_EQ("foo bar", field.name()); + EXPECT_EQ(iceberg::FixedType(10), *field.type()); + EXPECT_FALSE(field.optional()); + EXPECT_EQ("foo bar (2): fixed(10) (required)", field.ToString()); + EXPECT_EQ("foo bar (2): fixed(10) (required)", std::format("{}", field)); + } +} + +TEST(SchemaFieldTest, Equality) { + iceberg::SchemaField field1(1, "foo", std::make_shared(), false); + iceberg::SchemaField field2(2, "foo", std::make_shared(), false); + iceberg::SchemaField field3(1, "bar", std::make_shared(), false); + iceberg::SchemaField field4(1, "foo", std::make_shared(), false); + iceberg::SchemaField field5(1, "foo", std::make_shared(), true); + iceberg::SchemaField field6(1, "foo", std::make_shared(), false); + + ASSERT_EQ(field1, field1); + ASSERT_NE(field1, field2); + ASSERT_NE(field2, field1); + ASSERT_NE(field1, field3); + ASSERT_NE(field3, field2); + ASSERT_NE(field1, field4); + ASSERT_NE(field4, field1); + ASSERT_NE(field1, field5); + ASSERT_NE(field5, field1); + ASSERT_EQ(field1, field6); + ASSERT_EQ(field6, field1); +} diff --git a/test/core/schema_test.cc b/test/core/schema_test.cc new file mode 100644 index 000000000..43401947e --- /dev/null +++ b/test/core/schema_test.cc @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/schema.h" + +#include +#include + +#include +#include + +#include "iceberg/schema_field.h" +#include "iceberg/util/formatter.h" + +TEST(SchemaTest, Basics) { + { + iceberg::SchemaField field1(5, "foo", std::make_shared(), true); + iceberg::SchemaField field2(7, "bar", std::make_shared(), true); + iceberg::Schema schema(100, {field1, field2}); + ASSERT_EQ(schema, schema); + ASSERT_EQ(100, schema.schema_id()); + std::span fields = schema.fields(); + ASSERT_EQ(2, fields.size()); + ASSERT_EQ(field1, fields[0]); + ASSERT_EQ(field2, fields[1]); + ASSERT_THAT(schema.GetFieldById(5), ::testing::Optional(field1)); + ASSERT_THAT(schema.GetFieldById(7), ::testing::Optional(field2)); + ASSERT_THAT(schema.GetFieldByIndex(0), ::testing::Optional(field1)); + ASSERT_THAT(schema.GetFieldByIndex(1), ::testing::Optional(field2)); + ASSERT_THAT(schema.GetFieldByName("foo"), ::testing::Optional(field1)); + ASSERT_THAT(schema.GetFieldByName("bar"), ::testing::Optional(field2)); + + ASSERT_EQ(std::nullopt, schema.GetFieldById(0)); + ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(2)); + ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(-1)); + ASSERT_EQ(std::nullopt, schema.GetFieldByName("element")); + } + ASSERT_THAT( + []() { + iceberg::SchemaField field1(5, "foo", std::make_shared(), true); + iceberg::SchemaField field2(5, "bar", std::make_shared(), + true); + iceberg::Schema schema(100, {field1, field2}); + }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("duplicate field ID 5"))); +} + +TEST(SchemaTest, Equality) { + iceberg::SchemaField field1(5, "foo", std::make_shared(), true); + iceberg::SchemaField field2(7, "bar", std::make_shared(), true); + iceberg::SchemaField field3(5, "foobar", std::make_shared(), true); + iceberg::Schema schema1(100, {field1, field2}); + iceberg::Schema schema2(101, {field1, field2}); + iceberg::Schema schema3(101, {field1}); + iceberg::Schema schema4(101, {field3, field2}); + iceberg::Schema schema5(100, {field1, field2}); + + ASSERT_EQ(schema1, schema1); + ASSERT_NE(schema1, schema2); + ASSERT_NE(schema2, schema1); + ASSERT_NE(schema1, schema3); + ASSERT_NE(schema3, schema1); + ASSERT_NE(schema1, schema4); + ASSERT_NE(schema4, schema1); + ASSERT_EQ(schema1, schema5); + ASSERT_EQ(schema5, schema1); +} diff --git a/test/core/type_test.cc b/test/core/type_test.cc new file mode 100644 index 000000000..3a4b8d70d --- /dev/null +++ b/test/core/type_test.cc @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/type.h" + +#include +#include +#include +#include + +#include +#include + +#include "iceberg/util/formatter.h" + +struct TypeTestCase { + /// Test case name, must be safe for Googletest (alphanumeric + underscore) + std::string name; + std::shared_ptr type; + iceberg::TypeId type_id; + bool primitive; + std::string repr; +}; + +std::string TypeTestCaseToString(const ::testing::TestParamInfo& info) { + return info.param.name; +} + +class TypeTest : public ::testing::TestWithParam {}; + +TEST_P(TypeTest, TypeId) { + const auto& test_case = GetParam(); + ASSERT_EQ(test_case.type_id, test_case.type->type_id()); +} + +TEST_P(TypeTest, IsPrimitive) { + const auto& test_case = GetParam(); + if (test_case.primitive) { + ASSERT_TRUE(test_case.type->is_primitive()); + ASSERT_FALSE(test_case.type->is_nested()); + + const auto* primitive = + dynamic_cast(test_case.type.get()); + ASSERT_NE(nullptr, primitive); + } +} + +TEST_P(TypeTest, IsNested) { + const auto& test_case = GetParam(); + if (!test_case.primitive) { + ASSERT_FALSE(test_case.type->is_primitive()); + ASSERT_TRUE(test_case.type->is_nested()); + + const auto* nested = dynamic_cast(test_case.type.get()); + ASSERT_NE(nullptr, nested); + } +} + +TEST_P(TypeTest, ReflexiveEquality) { + const auto& test_case = GetParam(); + ASSERT_EQ(*test_case.type, *test_case.type); +} + +TEST_P(TypeTest, ToString) { + const auto& test_case = GetParam(); + ASSERT_EQ(test_case.repr, test_case.type->ToString()); +} + +TEST_P(TypeTest, StdFormat) { + const auto& test_case = GetParam(); + ASSERT_EQ(test_case.repr, std::format("{}", *test_case.type)); +} + +const static TypeTestCase kPrimitiveTypes[] = { + { + .name = "boolean", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kBoolean, + .primitive = true, + .repr = "boolean", + }, + { + .name = "int", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kInt, + .primitive = true, + .repr = "int", + }, + { + .name = "long", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kLong, + .primitive = true, + .repr = "long", + }, + { + .name = "float", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kFloat, + .primitive = true, + .repr = "float", + }, + { + .name = "double", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kDouble, + .primitive = true, + .repr = "double", + }, + { + .name = "decimal9_2", + .type = std::make_shared(9, 2), + .type_id = iceberg::TypeId::kDecimal, + .primitive = true, + .repr = "decimal(9, 2)", + }, + { + .name = "decimal38_10", + .type = std::make_shared(38, 10), + .type_id = iceberg::TypeId::kDecimal, + .primitive = true, + .repr = "decimal(38, 10)", + }, + { + .name = "date", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kDate, + .primitive = true, + .repr = "date", + }, + { + .name = "time", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kTime, + .primitive = true, + .repr = "time", + }, + { + .name = "timestamp", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kTimestamp, + .primitive = true, + .repr = "timestamp", + }, + { + .name = "timestamptz", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kTimestampTz, + .primitive = true, + .repr = "timestamptz", + }, + { + .name = "binary", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kBinary, + .primitive = true, + .repr = "binary", + }, + { + .name = "string", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kString, + .primitive = true, + .repr = "string", + }, + { + .name = "fixed10", + .type = std::make_shared(10), + .type_id = iceberg::TypeId::kFixed, + .primitive = true, + .repr = "fixed(10)", + }, + { + .name = "fixed255", + .type = std::make_shared(255), + .type_id = iceberg::TypeId::kFixed, + .primitive = true, + .repr = "fixed(255)", + }, + { + .name = "uuid", + .type = std::make_shared(), + .type_id = iceberg::TypeId::kUuid, + .primitive = true, + .repr = "uuid", + }, +}; + +const static TypeTestCase kNestedTypes[] = { + { + .name = "list_int", + .type = std::make_shared( + 1, std::make_shared(), true), + .type_id = iceberg::TypeId::kList, + .primitive = false, + .repr = "list", + }, + { + .name = "list_list_int", + .type = std::make_shared( + 1, + std::make_shared(2, std::make_shared(), + true), + false), + .type_id = iceberg::TypeId::kList, + .primitive = false, + .repr = "list (required)>", + }, + { + .name = "map_int_string", + .type = std::make_shared( + iceberg::SchemaField::MakeRequired(1, "key", + std::make_shared()), + iceberg::SchemaField::MakeRequired(2, "value", + std::make_shared())), + .type_id = iceberg::TypeId::kMap, + .primitive = false, + .repr = "map", + }, + { + .name = "struct", + .type = std::make_shared(std::vector{ + iceberg::SchemaField::MakeRequired(1, "foo", + std::make_shared()), + iceberg::SchemaField::MakeOptional(2, "bar", + std::make_shared()), + }), + .type_id = iceberg::TypeId::kStruct, + .primitive = false, + .repr = R"(struct< + foo (1): long (required) + bar (2): string +>)", + }, +}; + +INSTANTIATE_TEST_SUITE_P(Primitive, TypeTest, ::testing::ValuesIn(kPrimitiveTypes), + TypeTestCaseToString); + +INSTANTIATE_TEST_SUITE_P(Nested, TypeTest, ::testing::ValuesIn(kNestedTypes), + TypeTestCaseToString); + +TEST(TypeTest, Equality) { + std::vector> alltypes; + for (const auto& test_case : kPrimitiveTypes) { + alltypes.push_back(test_case.type); + } + for (const auto& test_case : kNestedTypes) { + alltypes.push_back(test_case.type); + } + + for (size_t i = 0; i < alltypes.size(); i++) { + for (size_t j = 0; j < alltypes.size(); j++) { + SCOPED_TRACE(std::format("{} == {}", *alltypes[i], *alltypes[j])); + + if (i == j) { + ASSERT_EQ(*alltypes[i], *alltypes[j]); + } else { + ASSERT_NE(*alltypes[i], *alltypes[j]); + } + } + } +} + +TEST(TypeTest, Decimal) { + { + iceberg::DecimalType decimal(38, 2); + ASSERT_EQ(38, decimal.precision()); + ASSERT_EQ(2, decimal.scale()); + } + { + iceberg::DecimalType decimal(10, -10); + ASSERT_EQ(10, decimal.precision()); + ASSERT_EQ(-10, decimal.scale()); + } + ASSERT_THAT([]() { iceberg::DecimalType decimal(-1, 10); }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("precision must be in [0, 38], was -1"))); + + ASSERT_THAT([]() { iceberg::DecimalType decimal(39, 10); }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("precision must be in [0, 38], was 39"))); +} + +TEST(TypeTest, Fixed) { + { + iceberg::FixedType fixed(0); + ASSERT_EQ(0, fixed.length()); + } + { + iceberg::FixedType fixed(1); + ASSERT_EQ(1, fixed.length()); + } + { + iceberg::FixedType fixed(127); + ASSERT_EQ(127, fixed.length()); + } + ASSERT_THAT([]() { iceberg::FixedType decimal(-1); }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("length must be >= 0, was -1"))); +} + +TEST(TypeTest, List) { + { + iceberg::SchemaField field(5, "element", std::make_shared(), true); + iceberg::ListType list(field); + std::span fields = list.fields(); + ASSERT_EQ(1, fields.size()); + ASSERT_EQ(field, fields[0]); + ASSERT_THAT(list.GetFieldById(5), ::testing::Optional(field)); + ASSERT_THAT(list.GetFieldByIndex(0), ::testing::Optional(field)); + ASSERT_THAT(list.GetFieldByName("element"), ::testing::Optional(field)); + + ASSERT_EQ(std::nullopt, list.GetFieldById(0)); + ASSERT_EQ(std::nullopt, list.GetFieldByIndex(1)); + ASSERT_EQ(std::nullopt, list.GetFieldByIndex(-1)); + ASSERT_EQ(std::nullopt, list.GetFieldByName("foo")); + } + ASSERT_THAT( + []() { + iceberg::ListType list(iceberg::SchemaField( + 1, "wrongname", std::make_shared(), true)); + }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("child field name should be 'element', was 'wrongname'"))); +} + +TEST(TypeTest, Map) { + { + iceberg::SchemaField key(5, "key", std::make_shared(), true); + iceberg::SchemaField value(7, "value", std::make_shared(), true); + iceberg::MapType map(key, value); + std::span fields = map.fields(); + ASSERT_EQ(2, fields.size()); + ASSERT_EQ(key, fields[0]); + ASSERT_EQ(value, fields[1]); + ASSERT_THAT(map.GetFieldById(5), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldById(7), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByIndex(0), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldByIndex(1), ::testing::Optional(value)); + ASSERT_THAT(map.GetFieldByName("key"), ::testing::Optional(key)); + ASSERT_THAT(map.GetFieldByName("value"), ::testing::Optional(value)); + + ASSERT_EQ(std::nullopt, map.GetFieldById(0)); + ASSERT_EQ(std::nullopt, map.GetFieldByIndex(2)); + ASSERT_EQ(std::nullopt, map.GetFieldByIndex(-1)); + ASSERT_EQ(std::nullopt, map.GetFieldByName("element")); + } + ASSERT_THAT( + []() { + iceberg::SchemaField key(5, "notkey", std::make_shared(), true); + iceberg::SchemaField value(7, "value", std::make_shared(), + true); + iceberg::MapType map(key, value); + }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("key field name should be 'key', was 'notkey'"))); + ASSERT_THAT( + []() { + iceberg::SchemaField key(5, "key", std::make_shared(), true); + iceberg::SchemaField value(7, "notvalue", std::make_shared(), + true); + iceberg::MapType map(key, value); + }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("value field name should be 'value', was 'notvalue'"))); +} + +TEST(TypeTest, Struct) { + { + iceberg::SchemaField field1(5, "foo", std::make_shared(), true); + iceberg::SchemaField field2(7, "bar", std::make_shared(), true); + iceberg::StructType struct_({field1, field2}); + std::span fields = struct_.fields(); + ASSERT_EQ(2, fields.size()); + ASSERT_EQ(field1, fields[0]); + ASSERT_EQ(field2, fields[1]); + ASSERT_THAT(struct_.GetFieldById(5), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldById(7), ::testing::Optional(field2)); + ASSERT_THAT(struct_.GetFieldByIndex(0), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldByIndex(1), ::testing::Optional(field2)); + ASSERT_THAT(struct_.GetFieldByName("foo"), ::testing::Optional(field1)); + ASSERT_THAT(struct_.GetFieldByName("bar"), ::testing::Optional(field2)); + + ASSERT_EQ(std::nullopt, struct_.GetFieldById(0)); + ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(2)); + ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(-1)); + ASSERT_EQ(std::nullopt, struct_.GetFieldByName("element")); + } + ASSERT_THAT( + []() { + iceberg::SchemaField field1(5, "foo", std::make_shared(), true); + iceberg::SchemaField field2(5, "bar", std::make_shared(), + true); + iceberg::StructType struct_({field1, field2}); + }, + ::testing::ThrowsMessage( + ::testing::HasSubstr("duplicate field ID 5"))); +}