From 3846d968dc0b3f3f79c21d3b06db56cf8b8cb267 Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 15 Jan 2025 20:16:39 -0500
Subject: [PATCH 01/13] Add headers for type/field/schema
---
docs/Doxyfile | 4 +-
src/iceberg/CMakeLists.txt | 2 +-
src/iceberg/schema.cc | 47 ++++
src/iceberg/schema.h | 64 ++++++
src/iceberg/schema_field.cc | 64 ++++++
src/iceberg/schema_field.h | 87 ++++++++
src/iceberg/type.cc | 297 ++++++++++++++++++++++++
src/iceberg/type.h | 397 +++++++++++++++++++++++++++++++++
src/iceberg/type_fwd.h | 63 ++++++
src/iceberg/util/formattable.h | 44 ++++
src/iceberg/util/formatter.h | 42 ++++
11 files changed, 1108 insertions(+), 3 deletions(-)
create mode 100644 src/iceberg/schema.cc
create mode 100644 src/iceberg/schema.h
create mode 100644 src/iceberg/schema_field.cc
create mode 100644 src/iceberg/schema_field.h
create mode 100644 src/iceberg/type.cc
create mode 100644 src/iceberg/type.h
create mode 100644 src/iceberg/type_fwd.h
create mode 100644 src/iceberg/util/formattable.h
create mode 100644 src/iceberg/util/formatter.h
diff --git a/docs/Doxyfile b/docs/Doxyfile
index d576080fe..75d69394e 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -195,7 +195,7 @@ INLINE_INHERITED_MEMB = NO
# shortest path that makes the file name unique will be used
# The default value is: YES.
-FULL_PATH_NAMES = NO
+FULL_PATH_NAMES = YES
# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
# Stripping is only done if one of the specified strings matches the left-hand
@@ -207,7 +207,7 @@ FULL_PATH_NAMES = NO
# will be relative from the directory where doxygen is started.
# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-STRIP_FROM_PATH =
+STRIP_FROM_PATH = ../src
# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
# path mentioned in the documentation of a class, which tells the reader which
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 4bebe4e4e..91706dc87 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-set(ICEBERG_SOURCES demo_table.cc)
+set(ICEBERG_SOURCES demo_table.cc schema.cc schema_field.cc type.cc)
add_iceberg_lib(iceberg
SOURCES
diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc
new file mode 100644
index 000000000..12874d68d
--- /dev/null
+++ b/src/iceberg/schema.cc
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/schema.h"
+
+#include
+
+#include "iceberg/type.h"
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+Schema::Schema(int32_t schema_id, std::vector fields)
+ : StructType(std::move(fields)), schema_id_(schema_id) {}
+
+int32_t Schema::schema_id() const { return schema_id_; }
+
+std::string Schema::ToString() const {
+ std::string repr = "schema<";
+ for (const auto& field : fields_) {
+ std::format_to(std::back_inserter(repr), " {}\n", field);
+ }
+ repr += ">";
+ return repr;
+}
+
+bool Schema::Equals(const Schema& other) const {
+ return schema_id_ == other.schema_id_ && fields_ == other.fields_;
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h
new file mode 100644
index 000000000..43ec9e16a
--- /dev/null
+++ b/src/iceberg/schema.h
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/schema.h
+/// Schemas for Iceberg tables. This header contains the definition of Schema
+/// and any utility functions. See iceberg/type.h and iceberg/field.h as well.
+
+#include
+#include
+#include
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/type.h"
+
+namespace iceberg {
+
+/// \brief A schema for a Table.
+///
+/// A schema is a list of typed columns, along with a unique integer ID. A
+/// Table may have different schemas over its lifetime due to schema
+/// evolution.
+class ICEBERG_EXPORT Schema : public StructType {
+ public:
+ Schema(int32_t schema_id, std::vector fields);
+
+ /// \brief Get the schema ID.
+ ///
+ /// Schemas are identified by a unique ID for the purposes of schema
+ /// evolution.
+ [[nodiscard]] int32_t schema_id() const;
+
+ [[nodiscard]] std::string ToString() const;
+
+ friend bool operator==(const Schema& lhs, const Schema& rhs) { return lhs.Equals(rhs); }
+
+ friend bool operator!=(const Schema& lhs, const Schema& rhs) { return !(lhs == rhs); }
+
+ private:
+ /// \brief Compare two schemas for equality.
+ [[nodiscard]] bool Equals(const Schema& other) const;
+
+ const int32_t schema_id_;
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/schema_field.cc b/src/iceberg/schema_field.cc
new file mode 100644
index 000000000..4de00b87a
--- /dev/null
+++ b/src/iceberg/schema_field.cc
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/schema_field.h"
+
+#include
+
+#include "iceberg/type.h"
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+SchemaField::SchemaField(int32_t field_id, std::string name, std::shared_ptr type,
+ bool optional)
+ : field_id_(field_id),
+ name_(std::move(name)),
+ type_(std::move(type)),
+ optional_(optional) {}
+
+SchemaField SchemaField::MakeOptional(int32_t field_id, std::string name,
+ std::shared_ptr type) {
+ return SchemaField(field_id, std::move(name), std::move(type), true);
+}
+
+SchemaField SchemaField::MakeRequired(int32_t field_id, std::string name,
+ std::shared_ptr type) {
+ return SchemaField(field_id, std::move(name), std::move(type), false);
+}
+
+int32_t SchemaField::field_id() const { return field_id_; }
+
+std::string_view SchemaField::name() const { return name_; }
+
+const std::shared_ptr& SchemaField::type() const { return type_; }
+
+bool SchemaField::optional() const { return optional_; }
+
+std::string SchemaField::ToString() const {
+ return std::format("{} ({}): {}{}", name_, field_id_, *type_,
+ optional_ ? "" : " (required)");
+}
+
+bool SchemaField::Equals(const SchemaField& other) const {
+ return field_id_ == other.field_id_ && name_ == other.name_ && *type_ == *other.type_ &&
+ optional_ == other.optional_;
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/schema_field.h b/src/iceberg/schema_field.h
new file mode 100644
index 000000000..e37c2d2d8
--- /dev/null
+++ b/src/iceberg/schema_field.h
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/schema_field.h
+/// A (schema) field is a name and a type and is part of a schema or nested
+/// type (e.g. a struct).
+
+#include
+#include
+#include
+#include
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/type_fwd.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief A type combined with a name.
+class ICEBERG_EXPORT SchemaField : public iceberg::util::Formattable {
+ public:
+ /// \brief Construct a field.
+ /// \param[in] field_id The field ID.
+ /// \param[in] name The field name.
+ /// \param[in] type The field type.
+ /// \param[in] optional Whether values of this field are required or nullable.
+ SchemaField(int32_t field_id, std::string name, std::shared_ptr type,
+ bool optional);
+
+ /// \brief Construct an optional (nullable) field.
+ static SchemaField MakeOptional(int32_t field_id, std::string name,
+ std::shared_ptr type);
+ /// \brief Construct a required (non-null) field.
+ static SchemaField MakeRequired(int32_t field_id, std::string name,
+ std::shared_ptr type);
+
+ /// \brief Get the field ID.
+ [[nodiscard]] int32_t field_id() const;
+
+ /// \brief Get the field name.
+ [[nodiscard]] std::string_view name() const;
+
+ /// \brief Get the field type.
+ [[nodiscard]] const std::shared_ptr& type() const;
+
+ /// \brief Get whether the field is optional.
+ [[nodiscard]] bool optional() const;
+
+ [[nodiscard]] std::string ToString() const;
+
+ friend bool operator==(const SchemaField& lhs, const SchemaField& rhs) {
+ return lhs.Equals(rhs);
+ }
+
+ friend bool operator!=(const SchemaField& lhs, const SchemaField& rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ /// \brief Compare two fields for equality.
+ [[nodiscard]] bool Equals(const SchemaField& other) const;
+
+ int32_t field_id_;
+ std::string name_;
+ std::shared_ptr type_;
+ bool optional_;
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
new file mode 100644
index 000000000..1132fce67
--- /dev/null
+++ b/src/iceberg/type.cc
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include
+#include
+#include
+
+#include "iceberg/util/formatter.h"
+
+namespace iceberg {
+
+TypeId BooleanType::type_id() const { return TypeId::kBoolean; }
+std::string BooleanType::ToString() const { return "boolean"; }
+bool BooleanType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kBoolean;
+}
+
+TypeId Int32Type::type_id() const { return TypeId::kInt32; }
+std::string Int32Type::ToString() const { return "int32"; }
+bool Int32Type::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kInt32;
+}
+
+TypeId Int64Type::type_id() const { return TypeId::kInt64; }
+std::string Int64Type::ToString() const { return "int64"; }
+bool Int64Type::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kInt64;
+}
+
+TypeId Float32Type::type_id() const { return TypeId::kFloat32; }
+std::string Float32Type::ToString() const { return "float32"; }
+bool Float32Type::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kFloat32;
+}
+
+TypeId Float64Type::type_id() const { return TypeId::kFloat64; }
+std::string Float64Type::ToString() const { return "float64"; }
+bool Float64Type::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kFloat64;
+}
+
+DecimalType::DecimalType(int32_t precision, int32_t scale)
+ : precision_(precision), scale_(scale) {
+ if (precision < 0 || precision > kMaxPrecision) {
+ throw std::runtime_error(
+ std::format("DecimalType: precision must be in [0, 38], was {}", precision));
+ }
+}
+
+int32_t DecimalType::precision() const { return precision_; }
+int32_t DecimalType::scale() const { return scale_; }
+TypeId DecimalType::type_id() const { return TypeId::kDecimal; }
+std::string DecimalType::ToString() const {
+ return std::format("decimal({}, {})", precision_, scale_);
+}
+bool DecimalType::Equals(const Type& other) const {
+ if (other.type_id() != TypeId::kDecimal) {
+ return false;
+ }
+ const auto& decimal = static_cast(other);
+ return precision_ == decimal.precision_ && scale_ == decimal.scale_;
+}
+
+TypeId TimeType::type_id() const { return TypeId::kTime; }
+std::string TimeType::ToString() const { return "time"; }
+bool TimeType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kTime;
+}
+
+TypeId DateType::type_id() const { return TypeId::kDate; }
+std::string DateType::ToString() const { return "date"; }
+bool DateType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kDate;
+}
+
+TypeId TimestampType::type_id() const { return TypeId::kTimestamp; }
+std::string TimestampType::ToString() const { return "timestamp"; }
+bool TimestampType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kTimestamp;
+}
+
+TypeId TimestampTzType::type_id() const { return TypeId::kTimestampTz; }
+std::string TimestampTzType::ToString() const { return "timestamptz"; }
+bool TimestampTzType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kTimestampTz;
+}
+
+TypeId BinaryType::type_id() const { return TypeId::kBinary; }
+std::string BinaryType::ToString() const { return "binary"; }
+bool BinaryType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kBinary;
+}
+
+TypeId StringType::type_id() const { return TypeId::kString; }
+std::string StringType::ToString() const { return "string"; }
+bool StringType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kString;
+}
+
+FixedType::FixedType(int32_t length) : length_(length) {
+ if (length < 0) {
+ throw std::runtime_error(
+ std::format("FixedType: length must be >= 0, was {}", length));
+ }
+}
+
+int32_t FixedType::length() const { return length_; }
+TypeId FixedType::type_id() const { return TypeId::kFixed; }
+std::string FixedType::ToString() const { return std::format("fixed({})", length_); }
+bool FixedType::Equals(const Type& other) const {
+ if (other.type_id() != TypeId::kFixed) {
+ return false;
+ }
+ const auto& fixed = static_cast(other);
+ return length_ == fixed.length_;
+}
+
+TypeId UuidType::type_id() const { return TypeId::kUuid; }
+std::string UuidType::ToString() const { return "uuid"; }
+bool UuidType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kUuid;
+}
+
+ListType::ListType(SchemaField element) : element_(std::move(element)) {
+ if (element_.name() != kElementName) {
+ throw std::runtime_error(
+ std::format("ListType: child field name should be '{}', was '{}'", kElementName,
+ element_.name()));
+ }
+}
+
+ListType::ListType(int32_t field_id, std::shared_ptr type, bool optional)
+ : element_(field_id, std::string(kElementName), std::move(type), optional) {}
+
+TypeId ListType::type_id() const { return TypeId::kList; }
+std::string ListType::ToString() const { return std::format("list<{}>", element_); }
+std::span ListType::fields() const { return {&element_, 1}; }
+std::optional> ListType::GetFieldById(
+ int32_t field_id) const {
+ if (field_id == element_.field_id()) {
+ return std::cref(element_);
+ }
+ return std::nullopt;
+}
+std::optional> ListType::GetFieldByIndex(
+ int index) const {
+ if (index == 0) {
+ return std::cref(element_);
+ }
+ return std::nullopt;
+}
+std::optional> ListType::GetFieldByName(
+ std::string_view name) const {
+ if (name == element_.name()) {
+ return std::cref(element_);
+ }
+ return std::nullopt;
+}
+bool ListType::Equals(const Type& other) const {
+ if (other.type_id() != TypeId::kList) {
+ return false;
+ }
+ const auto& list = static_cast(other);
+ return element_ == list.element_;
+}
+
+MapType::MapType(SchemaField key, SchemaField value)
+ : fields_{std::move(key), std::move(value)} {
+ if (this->key().name() != kKeyName) {
+ throw std::runtime_error(
+ std::format("MapType: key field name should be '{}', was '{}'", kKeyName,
+ this->key().name()));
+ }
+ if (this->value().name() != kValueName) {
+ throw std::runtime_error(
+ std::format("MapType: value field name should be '{}', was '{}'", kValueName,
+ this->value().name()));
+ }
+}
+
+const SchemaField& MapType::key() const { return fields_[0]; }
+const SchemaField& MapType::value() const { return fields_[1]; }
+TypeId MapType::type_id() const { return TypeId::kMap; }
+std::string MapType::ToString() const {
+ return std::format("map<{}: {}>", key(), value());
+}
+std::span MapType::fields() const { return fields_; }
+std::optional> MapType::GetFieldById(
+ int32_t field_id) const {
+ if (field_id == key().field_id()) {
+ return key();
+ } else if (field_id == value().field_id()) {
+ return value();
+ }
+ return std::nullopt;
+}
+std::optional> MapType::GetFieldByIndex(
+ int index) const {
+ if (index == 0) {
+ return key();
+ } else if (index == 0) {
+ return value();
+ }
+ return std::nullopt;
+}
+std::optional> MapType::GetFieldByName(
+ std::string_view name) const {
+ if (name == kKeyName) {
+ return key();
+ } else if (name == kValueName) {
+ return value();
+ }
+ return std::nullopt;
+}
+bool MapType::Equals(const Type& other) const {
+ if (other.type_id() != TypeId::kMap) {
+ return false;
+ }
+ const auto& map = static_cast(other);
+ return fields_ == map.fields_;
+}
+
+StructType::StructType(std::vector fields) : fields_(std::move(fields)) {
+ size_t index = 0;
+ for (const auto& field : fields_) {
+ auto [it, inserted] = field_id_to_index_.try_emplace(field.field_id(), index);
+ if (!inserted) {
+ throw std::runtime_error(
+ std::format("StructType: duplicate field ID {} (field indices {} and {})",
+ field.field_id(), it->second, index));
+ }
+
+ index++;
+ }
+}
+
+TypeId StructType::type_id() const { return TypeId::kStruct; }
+std::string StructType::ToString() const {
+ std::string repr = "struct<\n";
+ for (const auto& field : fields_) {
+ std::format_to(std::back_inserter(repr), " {}\n", field);
+ }
+ repr += ">";
+ return repr;
+}
+std::span StructType::fields() const { return fields_; }
+std::optional> StructType::GetFieldById(
+ int32_t field_id) const {
+ auto it = field_id_to_index_.find(field_id);
+ if (it == field_id_to_index_.end()) return std::nullopt;
+ return fields_[it->second];
+}
+std::optional> StructType::GetFieldByIndex(
+ int index) const {
+ if (index < 0 || index >= static_cast(fields_.size())) {
+ return std::nullopt;
+ }
+ return fields_[index];
+}
+std::optional> StructType::GetFieldByName(
+ std::string_view name) const {
+ // TODO: what is the right behavior if there are duplicate names? (Are
+ // duplicate names permitted?)
+ for (const auto& field : fields_) {
+ if (field.name() == name) {
+ return field;
+ }
+ }
+ return std::nullopt;
+}
+bool StructType::Equals(const Type& other) const {
+ if (other.type_id() != TypeId::kStruct) {
+ return false;
+ }
+ const auto& struct_ = static_cast(other);
+ return fields_ == struct_.fields_;
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
new file mode 100644
index 000000000..3910b2314
--- /dev/null
+++ b/src/iceberg/type.h
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type.h
+/// Data types for Iceberg. This header defines the data types, but see
+/// iceberg/type_fwd.h for the enum defining the list of types.
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formattable.h"
+
+namespace iceberg {
+
+/// \brief Interface for a data type for a field.
+class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
+ public:
+ virtual ~Type() = default;
+
+ /// \brief Get the type ID.
+ [[nodiscard]] virtual TypeId type_id() const = 0;
+
+ /// \brief Is this a primitive type (may not have child fields)?
+ [[nodiscard]] virtual bool is_primitive() const = 0;
+
+ /// \brief Is this a nested type (may have child fields)?
+ [[nodiscard]] virtual bool is_nested() const = 0;
+
+ /// \brief Compare two types for equality.
+ friend bool operator==(const Type& lhs, const Type& rhs) { return lhs.Equals(rhs); }
+
+ /// \brief Compare two types for inequality.
+ friend bool operator!=(const Type& lhs, const Type& rhs) { return !(lhs == rhs); }
+
+ protected:
+ /// \brief Compare two types for equality.
+ [[nodiscard]] virtual bool Equals(const Type& other) const = 0;
+};
+
+/// \brief A data type that may not have child fields.
+class ICEBERG_EXPORT PrimitiveType : public Type {
+ public:
+ bool is_primitive() const override { return true; }
+ bool is_nested() const override { return false; }
+};
+
+/// \brief A data type that may have child fields.
+class ICEBERG_EXPORT NestedType : public Type {
+ public:
+ bool is_primitive() const override { return false; }
+ bool is_nested() const override { return true; }
+
+ /// \brief Get a view of the child fields.
+ [[nodiscard]] virtual std::span fields() const = 0;
+ /// \brief Get a field by field ID.
+ [[nodiscard]] virtual std::optional>
+ GetFieldById(int32_t field_id) const = 0;
+ /// \brief Get a field by index.
+ [[nodiscard]] virtual std::optional>
+ GetFieldByIndex(int i) const = 0;
+ /// \brief Get a field by name.
+ [[nodiscard]] virtual std::optional>
+ GetFieldByName(std::string_view name) const = 0;
+};
+
+/// \defgroup type-primitive Primitive Types
+/// Primitive types do not have nested fields.
+/// @{
+
+/// \brief A data type representing a boolean.
+class ICEBERG_EXPORT BooleanType : public PrimitiveType {
+ public:
+ BooleanType() = default;
+ ~BooleanType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit signed integer.
+class ICEBERG_EXPORT Int32Type : public PrimitiveType {
+ public:
+ Int32Type() = default;
+ ~Int32Type() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit signed integer.
+class ICEBERG_EXPORT Int64Type : public PrimitiveType {
+ public:
+ Int64Type() = default;
+ ~Int64Type() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 32-bit (single precision) float.
+class ICEBERG_EXPORT Float32Type : public PrimitiveType {
+ public:
+ Float32Type() = default;
+ ~Float32Type() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a 64-bit (double precision) float.
+class ICEBERG_EXPORT Float64Type : public PrimitiveType {
+ public:
+ Float64Type() = default;
+ ~Float64Type() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a fixed-precision decimal.
+class ICEBERG_EXPORT DecimalType : public PrimitiveType {
+ public:
+ constexpr static const int32_t kMaxPrecision = 38;
+
+ /// \brief Construct a decimal type with the given precision and scale.
+ DecimalType(int32_t precision, int32_t scale);
+ ~DecimalType() = default;
+
+ /// \brief Get the precision (the number of decimal digits).
+ [[nodiscard]] int32_t precision() const;
+ /// \brief Get the scale (essentially, the number of decimal digits after
+ /// the decimal point; precisely, the value is scaled by $$10^{-s}$$.).
+ [[nodiscard]] int32_t scale() const;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+
+ private:
+ int32_t precision_;
+ int32_t scale_;
+};
+
+/// \brief A data type representing a calendar date without reference to a
+/// timezone or time.
+class ICEBERG_EXPORT DateType : public PrimitiveType {
+ public:
+ DateType() = default;
+ ~DateType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a wall clock time in microseconds without
+/// reference to a timezone or date.
+class ICEBERG_EXPORT TimeType : public PrimitiveType {
+ public:
+ TimeType() = default;
+ ~TimeType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a timestamp in microseconds without
+/// reference to a timezone.
+class ICEBERG_EXPORT TimestampType : public PrimitiveType {
+ public:
+ TimestampType() = default;
+ ~TimestampType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a timestamp in microseconds in a
+/// particular timezone.
+class ICEBERG_EXPORT TimestampTzType : public PrimitiveType {
+ public:
+ TimestampTzType() = default;
+ ~TimestampTzType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a bytestring.
+class ICEBERG_EXPORT BinaryType : public PrimitiveType {
+ public:
+ BinaryType() = default;
+ ~BinaryType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a string.
+class ICEBERG_EXPORT StringType : public PrimitiveType {
+ public:
+ StringType() = default;
+ ~StringType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a fixed-length bytestring.
+class ICEBERG_EXPORT FixedType : public PrimitiveType {
+ public:
+ /// \brief Construct a fixed type with the given length.
+ FixedType(int32_t length);
+ ~FixedType() = default;
+
+ /// \brief The length (the number of bytes to store).
+ [[nodiscard]] int32_t length() const;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+
+ private:
+ int32_t length_;
+};
+
+/// \brief A data type representing a UUID. While defined as a distinct type,
+/// it is effectively a fixed(16).
+class ICEBERG_EXPORT UuidType : public PrimitiveType {
+ public:
+ UuidType() = default;
+ ~UuidType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// @}
+
+/// \defgroup type-nested Nested Types
+/// Nested types have nested fields.
+/// @{
+
+/// \brief A data type representing a list of values.
+class ICEBERG_EXPORT ListType : public NestedType {
+ public:
+ constexpr static const std::string_view kElementName = "element";
+
+ /// \brief Construct a list of the given element. The name of the child
+ /// field should be "element".
+ explicit ListType(SchemaField element);
+ /// \brief Construct a list of the given element type.
+ ListType(int32_t field_id, std::shared_ptr type, bool optional);
+ ~ListType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ std::span fields() const override;
+ std::optional> GetFieldById(
+ int32_t field_id) const override;
+ std::optional> GetFieldByIndex(
+ int i) const override;
+ std::optional> GetFieldByName(
+ std::string_view name) const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+
+ SchemaField element_;
+};
+
+/// \brief A data type representing a dictionary of values.
+class ICEBERG_EXPORT MapType : public NestedType {
+ public:
+ constexpr static const std::string_view kKeyName = "key";
+ constexpr static const std::string_view kValueName = "value";
+
+ /// \brief Construct a map of the given key/value fields. The field names
+ /// should be "key" and "value", respectively.
+ explicit MapType(SchemaField key, SchemaField value);
+ ~MapType() = default;
+
+ const SchemaField& key() const;
+ const SchemaField& value() const;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ std::span fields() const override;
+ std::optional> GetFieldById(
+ int32_t field_id) const override;
+ std::optional> GetFieldByIndex(
+ int i) const override;
+ std::optional> GetFieldByName(
+ std::string_view name) const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+
+ std::array fields_;
+};
+
+/// \brief A data type representing a struct with nested fields.
+class ICEBERG_EXPORT StructType : public NestedType {
+ public:
+ explicit StructType(std::vector fields);
+ ~StructType() = default;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ std::span fields() const override;
+ std::optional> GetFieldById(
+ int32_t field_id) const override;
+ std::optional> GetFieldByIndex(
+ int i) const override;
+ std::optional> GetFieldByName(
+ std::string_view name) const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+
+ std::vector fields_;
+ std::unordered_map field_id_to_index_;
+};
+
+/// @}
+
+// TODO: need to specialize std::format (ideally via a trait?)
+
+} // namespace iceberg
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
new file mode 100644
index 000000000..b05bb7d3f
--- /dev/null
+++ b/src/iceberg/type_fwd.h
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/type_fwd.h
+/// Forward declarations and enum definitions. When writing your own headers,
+/// you can include this instead of the "full" headers to help reduce compile
+/// times.
+
+namespace iceberg {
+
+/// \brief A data type.
+///
+/// This is not a complete data type by itself because some types are nested
+/// and/or parameterized.
+///
+/// Iceberg V3 types are not currently supported.
+enum class TypeId {
+ kBoolean,
+ kInt32,
+ kInt64,
+ kFloat32,
+ kFloat64,
+ kDecimal,
+ kDate,
+ kTime,
+ kTimestamp,
+ kTimestampTz,
+ kBinary,
+ kString,
+ kFixed,
+ kUuid,
+ kStruct,
+ kList,
+ kMap,
+};
+
+class BooleanType;
+class SchemaField;
+class NestedType;
+class PrimitiveType;
+class Schema;
+class StructType;
+class Type;
+
+} // namespace iceberg
diff --git a/src/iceberg/util/formattable.h b/src/iceberg/util/formattable.h
new file mode 100644
index 000000000..422c5a921
--- /dev/null
+++ b/src/iceberg/util/formattable.h
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/util/formattable.h
+/// Interface for objects that can be formatted via std::format. The actual
+/// std::formatter specialization is in iceberg/util/formatter.h to avoid
+/// bringing in unnecessarily.
+
+#include
+
+#include "iceberg/iceberg_export.h"
+
+namespace iceberg::util {
+
+/// \brief Interface for objects that can be formatted via std::format.
+///
+/// You must include iceberg/util/formatter.h when calling std::format.
+class ICEBERG_EXPORT Formattable {
+ public:
+ virtual ~Formattable() = default;
+
+ /// \brief Get a user-readable string representation.
+ virtual std::string ToString() const = 0;
+};
+
+} // namespace iceberg::util
diff --git a/src/iceberg/util/formatter.h b/src/iceberg/util/formatter.h
new file mode 100644
index 000000000..42817fdc8
--- /dev/null
+++ b/src/iceberg/util/formatter.h
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/util/formatter.h
+/// A specialization of std::formatter for Formattable objects. This header
+/// is separate from iceberg/util/formattable.h so that the latter (which is
+/// meant to be included widely) does not leak unnecessarily into
+/// other headers.
+
+#include
+#include
+#include
+#include
+
+#include "iceberg/util/formattable.h"
+
+/// \brief Make all classes deriving from iceberg::util::Formattable
+/// formattable with std::format.
+template Derived>
+struct std::formatter : std::formatter {
+ auto format(const iceberg::util::Formattable& obj, std::format_context& ctx) const {
+ return std::formatter::format(obj.ToString(), ctx);
+ }
+};
From ce37966edd232e6b26c4f4d11e2ec337031e0dad Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 15:18:00 +0900
Subject: [PATCH 02/13] fix clang
---
src/iceberg/type.cc | 17 +++++++++++++++--
src/iceberg/util/formatter.h | 5 +++--
2 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index 1132fce67..1b664b291 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -151,7 +151,14 @@ ListType::ListType(int32_t field_id, std::shared_ptr type, bool optional)
: element_(field_id, std::string(kElementName), std::move(type), optional) {}
TypeId ListType::type_id() const { return TypeId::kList; }
-std::string ListType::ToString() const { return std::format("list<{}>", element_); }
+std::string ListType::ToString() const {
+ // XXX: work around Clang/libc++: "<{}>" in a format string appears to get
+ // parsed as {<>} or something; split up the format string to avoid that
+ std::string repr = "list<";
+ std::format_to(std::back_inserter(repr), "{}", element_);
+ repr += ">";
+ return repr;
+}
std::span ListType::fields() const { return {&element_, 1}; }
std::optional> ListType::GetFieldById(
int32_t field_id) const {
@@ -200,7 +207,13 @@ const SchemaField& MapType::key() const { return fields_[0]; }
const SchemaField& MapType::value() const { return fields_[1]; }
TypeId MapType::type_id() const { return TypeId::kMap; }
std::string MapType::ToString() const {
- return std::format("map<{}: {}>", key(), value());
+ // XXX: work around Clang/libc++: "<{}>" in a format string appears to get
+ // parsed as {<>} or something; split up the format string to avoid that
+ std::string repr = "map<";
+
+ std::format_to(std::back_inserter(repr), "{}: {}", key(), value());
+ repr += "}";
+ return repr;
}
std::span MapType::fields() const { return fields_; }
std::optional> MapType::GetFieldById(
diff --git a/src/iceberg/util/formatter.h b/src/iceberg/util/formatter.h
index 42817fdc8..ed1c5cbaa 100644
--- a/src/iceberg/util/formatter.h
+++ b/src/iceberg/util/formatter.h
@@ -33,10 +33,11 @@
#include "iceberg/util/formattable.h"
/// \brief Make all classes deriving from iceberg::util::Formattable
-/// formattable with std::format.
+/// formattable with std::format.
template Derived>
struct std::formatter : std::formatter {
- auto format(const iceberg::util::Formattable& obj, std::format_context& ctx) const {
+ template
+ auto format(const iceberg::util::Formattable& obj, FormatContext& ctx) const {
return std::formatter::format(obj.ToString(), ctx);
}
};
From dd166d358d756df8d63a2a7dd6b8a2a0bb916600 Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 06:25:54 -0500
Subject: [PATCH 03/13] updates
---
src/iceberg/type.cc | 8 ++++++--
src/iceberg/type.h | 32 ++++++++++++++++++++++++--------
src/iceberg/type_fwd.h | 5 +++++
3 files changed, 35 insertions(+), 10 deletions(-)
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index 1b664b291..d44d554a0 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -91,12 +91,16 @@ bool DateType::Equals(const Type& other) const {
return other.type_id() == TypeId::kDate;
}
+bool TimestampType::is_zoned() const { return false; }
+TimeUnit TimestampType::time_unit() const { return TimeUnit::kMicrosecond; }
TypeId TimestampType::type_id() const { return TypeId::kTimestamp; }
std::string TimestampType::ToString() const { return "timestamp"; }
bool TimestampType::Equals(const Type& other) const {
return other.type_id() == TypeId::kTimestamp;
}
+bool TimestampTzType::is_zoned() const { return true; }
+TimeUnit TimestampTzType::time_unit() const { return TimeUnit::kMicrosecond; }
TypeId TimestampTzType::type_id() const { return TypeId::kTimestampTz; }
std::string TimestampTzType::ToString() const { return "timestamptz"; }
bool TimestampTzType::Equals(const Type& other) const {
@@ -226,7 +230,7 @@ std::optional> MapType::GetFieldById(
return std::nullopt;
}
std::optional> MapType::GetFieldByIndex(
- int index) const {
+ int32_t index) const {
if (index == 0) {
return key();
} else if (index == 0) {
@@ -282,7 +286,7 @@ std::optional> StructType::GetFieldByI
return fields_[it->second];
}
std::optional> StructType::GetFieldByIndex(
- int index) const {
+ int32_t index) const {
if (index < 0 || index >= static_cast(fields_.size())) {
return std::nullopt;
}
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index 3910b2314..aba37f72b 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -83,7 +83,7 @@ class ICEBERG_EXPORT NestedType : public Type {
GetFieldById(int32_t field_id) const = 0;
/// \brief Get a field by index.
[[nodiscard]] virtual std::optional>
- GetFieldByIndex(int i) const = 0;
+ GetFieldByIndex(int32_t index) const = 0;
/// \brief Get a field by name.
[[nodiscard]] virtual std::optional>
GetFieldByName(std::string_view name) const = 0;
@@ -212,13 +212,26 @@ class ICEBERG_EXPORT TimeType : public PrimitiveType {
bool Equals(const Type& other) const override;
};
+/// \brief A base class for any timestamp time (irrespective of unit or
+/// timezone).
+class ICEBERG_EXPORT TimestampBase : public PrimitiveType {
+ public:
+ /// \brief Is this type zoned or naive?
+ [[nodiscard]] virtual bool is_zoned() const = 0;
+ /// \brief The time resolution.
+ [[nodiscard]] virtual TimeUnit time_unit() const = 0;
+};
+
/// \brief A data type representing a timestamp in microseconds without
/// reference to a timezone.
-class ICEBERG_EXPORT TimestampType : public PrimitiveType {
+class ICEBERG_EXPORT TimestampType : public TimestampBase {
public:
TimestampType() = default;
~TimestampType() = default;
+ bool is_zoned() const override;
+ TimeUnit time_unit() const override;
+
TypeId type_id() const override;
std::string ToString() const override;
@@ -226,13 +239,16 @@ class ICEBERG_EXPORT TimestampType : public PrimitiveType {
bool Equals(const Type& other) const override;
};
-/// \brief A data type representing a timestamp in microseconds in a
-/// particular timezone.
-class ICEBERG_EXPORT TimestampTzType : public PrimitiveType {
+/// \brief A data type representing a timestamp as microseconds since the
+/// epoch in UTC.
+class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
public:
TimestampTzType() = default;
~TimestampTzType() = default;
+ bool is_zoned() const override;
+ TimeUnit time_unit() const override;
+
TypeId type_id() const override;
std::string ToString() const override;
@@ -325,7 +341,7 @@ class ICEBERG_EXPORT ListType : public NestedType {
std::optional> GetFieldById(
int32_t field_id) const override;
std::optional> GetFieldByIndex(
- int i) const override;
+ int32_t index) const override;
std::optional> GetFieldByName(
std::string_view name) const override;
@@ -356,7 +372,7 @@ class ICEBERG_EXPORT MapType : public NestedType {
std::optional> GetFieldById(
int32_t field_id) const override;
std::optional> GetFieldByIndex(
- int i) const override;
+ int32_t index) const override;
std::optional> GetFieldByName(
std::string_view name) const override;
@@ -379,7 +395,7 @@ class ICEBERG_EXPORT StructType : public NestedType {
std::optional> GetFieldById(
int32_t field_id) const override;
std::optional> GetFieldByIndex(
- int i) const override;
+ int32_t index) const override;
std::optional> GetFieldByName(
std::string_view name) const override;
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index b05bb7d3f..c70a2a7d8 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -52,6 +52,11 @@ enum class TypeId {
kMap,
};
+/// \brief The time unit. In Iceberg V3 nanoseconds are also supported.
+enum class TimeUnit {
+ kMicrosecond,
+};
+
class BooleanType;
class SchemaField;
class NestedType;
From f51e8d36ee3b24cbfcb44c42c120b99f40b46fb3 Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 19:39:52 -0500
Subject: [PATCH 04/13] add some unit tests
---
src/iceberg/type.cc | 2 +-
src/iceberg/type.h | 2 -
src/iceberg/type_fwd.h | 19 ++-
test/core/CMakeLists.txt | 2 +-
test/core/type_test.cc | 256 +++++++++++++++++++++++++++++++++++++++
5 files changed, 276 insertions(+), 5 deletions(-)
create mode 100644 test/core/type_test.cc
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index d44d554a0..dc3235442 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -216,7 +216,7 @@ std::string MapType::ToString() const {
std::string repr = "map<";
std::format_to(std::back_inserter(repr), "{}: {}", key(), value());
- repr += "}";
+ repr += ">";
return repr;
}
std::span MapType::fields() const { return fields_; }
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index aba37f72b..50f3d18a9 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -408,6 +408,4 @@ class ICEBERG_EXPORT StructType : public NestedType {
/// @}
-// TODO: need to specialize std::format (ideally via a trait?)
-
} // namespace iceberg
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index c70a2a7d8..5726c8f1e 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -57,12 +57,29 @@ enum class TimeUnit {
kMicrosecond,
};
+class BinaryType;
class BooleanType;
-class SchemaField;
+class DateType;
+class DecimalType;
+class FixedType;
+class Float32Type;
+class Float64Type;
+class Int32Type;
+class Int64Type;
+class ListType;
+class MapType;
class NestedType;
class PrimitiveType;
class Schema;
+class SchemaField;
+class StringType;
+class StructType;
class StructType;
+class TimeType;
+class TimestampBase;
+class TimestampType;
+class TimestampTzType;
class Type;
+class UuidType;
} // namespace iceberg
diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt
index 551201779..787f5cbd1 100644
--- a/test/core/CMakeLists.txt
+++ b/test/core/CMakeLists.txt
@@ -16,7 +16,7 @@
# under the License.
add_executable(core_unittest)
-target_sources(core_unittest PRIVATE core_unittest.cc)
+target_sources(core_unittest PRIVATE core_unittest.cc type_test.cc)
target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main)
target_include_directories(core_unittest PRIVATE "${ICEBERG_INCLUDES}")
add_test(NAME core_unittest COMMAND core_unittest)
diff --git a/test/core/type_test.cc b/test/core/type_test.cc
new file mode 100644
index 000000000..5bb2dfb07
--- /dev/null
+++ b/test/core/type_test.cc
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/type.h"
+
+#include
+#include
+#include
+
+#include
+
+#include "gtest/gtest.h"
+#include "iceberg/util/formatter.h"
+
+struct TypeTestCase {
+ /// Test case name, must be safe for Googletest (alphanumeric + underscore)
+ std::string name;
+ std::shared_ptr type;
+ iceberg::TypeId type_id;
+ bool primitive;
+ std::string repr;
+};
+
+std::string TypeTestCaseToString(const ::testing::TestParamInfo& info) {
+ return info.param.name;
+}
+
+class TypeTest : public ::testing::TestWithParam {};
+
+TEST_P(TypeTest, TypeId) {
+ const auto& test_case = GetParam();
+ ASSERT_EQ(test_case.type_id, test_case.type->type_id());
+}
+
+TEST_P(TypeTest, IsPrimitive) {
+ const auto& test_case = GetParam();
+ if (test_case.primitive) {
+ ASSERT_TRUE(test_case.type->is_primitive());
+ ASSERT_FALSE(test_case.type->is_nested());
+
+ const auto* primitive =
+ dynamic_cast(test_case.type.get());
+ ASSERT_NE(nullptr, primitive);
+ }
+}
+
+TEST_P(TypeTest, IsNested) {
+ const auto& test_case = GetParam();
+ if (!test_case.primitive) {
+ ASSERT_FALSE(test_case.type->is_primitive());
+ ASSERT_TRUE(test_case.type->is_nested());
+
+ const auto* nested = dynamic_cast(test_case.type.get());
+ ASSERT_NE(nullptr, nested);
+ }
+}
+
+TEST_P(TypeTest, ReflexiveEquality) {
+ const auto& test_case = GetParam();
+ ASSERT_EQ(*test_case.type, *test_case.type);
+}
+
+TEST_P(TypeTest, ToString) {
+ const auto& test_case = GetParam();
+ ASSERT_EQ(test_case.repr, test_case.type->ToString());
+}
+
+TEST_P(TypeTest, StdFormat) {
+ const auto& test_case = GetParam();
+ ASSERT_EQ(test_case.repr, std::format("{}", *test_case.type));
+}
+
+const static TypeTestCase kPrimitiveTypes[] = {
+ {
+ .name = "boolean",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kBoolean,
+ .primitive = true,
+ .repr = "boolean",
+ },
+ {
+ .name = "int32",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kInt32,
+ .primitive = true,
+ .repr = "int32",
+ },
+ {
+ .name = "int64",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kInt64,
+ .primitive = true,
+ .repr = "int64",
+ },
+ {
+ .name = "float32",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kFloat32,
+ .primitive = true,
+ .repr = "float32",
+ },
+ {
+ .name = "float64",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kFloat64,
+ .primitive = true,
+ .repr = "float64",
+ },
+ {
+ .name = "decimal9_2",
+ .type = std::make_shared(9, 2),
+ .type_id = iceberg::TypeId::kDecimal,
+ .primitive = true,
+ .repr = "decimal(9, 2)",
+ },
+ {
+ .name = "decimal38_10",
+ .type = std::make_shared(38, 10),
+ .type_id = iceberg::TypeId::kDecimal,
+ .primitive = true,
+ .repr = "decimal(38, 10)",
+ },
+ {
+ .name = "date",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kDate,
+ .primitive = true,
+ .repr = "date",
+ },
+ {
+ .name = "time",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kTime,
+ .primitive = true,
+ .repr = "time",
+ },
+ {
+ .name = "timestamp",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kTimestamp,
+ .primitive = true,
+ .repr = "timestamp",
+ },
+ {
+ .name = "timestamptz",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kTimestampTz,
+ .primitive = true,
+ .repr = "timestamptz",
+ },
+ {
+ .name = "binary",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kBinary,
+ .primitive = true,
+ .repr = "binary",
+ },
+ {
+ .name = "string",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kString,
+ .primitive = true,
+ .repr = "string",
+ },
+ {
+ .name = "fixed10",
+ .type = std::make_shared(10),
+ .type_id = iceberg::TypeId::kFixed,
+ .primitive = true,
+ .repr = "fixed(10)",
+ },
+ {
+ .name = "fixed255",
+ .type = std::make_shared(255),
+ .type_id = iceberg::TypeId::kFixed,
+ .primitive = true,
+ .repr = "fixed(255)",
+ },
+ {
+ .name = "uuid",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kUuid,
+ .primitive = true,
+ .repr = "uuid",
+ },
+};
+
+const static TypeTestCase kNestedTypes[] = {
+ {
+ .name = "list_int",
+ .type = std::make_shared(
+ 1, std::make_shared(), true),
+ .type_id = iceberg::TypeId::kList,
+ .primitive = false,
+ .repr = "list",
+ },
+ {
+ .name = "list_list_int",
+ .type = std::make_shared(
+ 1,
+ std::make_shared(2, std::make_shared(),
+ true),
+ false),
+ .type_id = iceberg::TypeId::kList,
+ .primitive = false,
+ .repr = "list (required)>",
+ },
+ {
+ .name = "map_int_string",
+ .type = std::make_shared(
+ iceberg::SchemaField::MakeRequired(1, "key",
+ std::make_shared()),
+ iceberg::SchemaField::MakeRequired(2, "value",
+ std::make_shared())),
+ .type_id = iceberg::TypeId::kMap,
+ .primitive = false,
+ .repr = "map",
+ },
+ {
+ .name = "struct",
+ .type = std::make_shared(std::vector{
+ iceberg::SchemaField::MakeRequired(1, "foo",
+ std::make_shared()),
+ iceberg::SchemaField::MakeOptional(2, "bar",
+ std::make_shared()),
+ }),
+ .type_id = iceberg::TypeId::kStruct,
+ .primitive = false,
+ .repr = R"(struct<
+ foo (1): int64 (required)
+ bar (2): string
+>)",
+ },
+};
+
+INSTANTIATE_TEST_SUITE_P(Primitive, TypeTest, ::testing::ValuesIn(kPrimitiveTypes),
+ TypeTestCaseToString);
+
+INSTANTIATE_TEST_SUITE_P(Nested, TypeTest, ::testing::ValuesIn(kNestedTypes),
+ TypeTestCaseToString);
From 91cd927fdf52f64e2bc59cab8c81a0508fc62157 Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 21:13:08 -0500
Subject: [PATCH 05/13] add more unit tests
---
test/core/type_test.cc | 93 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 92 insertions(+), 1 deletion(-)
diff --git a/test/core/type_test.cc b/test/core/type_test.cc
index 5bb2dfb07..d05b76e9f 100644
--- a/test/core/type_test.cc
+++ b/test/core/type_test.cc
@@ -21,11 +21,12 @@
#include
#include
+#include
#include
+#include
#include
-#include "gtest/gtest.h"
#include "iceberg/util/formatter.h"
struct TypeTestCase {
@@ -254,3 +255,93 @@ INSTANTIATE_TEST_SUITE_P(Primitive, TypeTest, ::testing::ValuesIn(kPrimitiveType
INSTANTIATE_TEST_SUITE_P(Nested, TypeTest, ::testing::ValuesIn(kNestedTypes),
TypeTestCaseToString);
+
+TEST(TypeTest, Equality) {
+ std::vector> alltypes;
+ for (const auto& test_case : kPrimitiveTypes) {
+ alltypes.push_back(test_case.type);
+ }
+ for (const auto& test_case : kNestedTypes) {
+ alltypes.push_back(test_case.type);
+ }
+
+ for (size_t i = 0; i < alltypes.size(); i++) {
+ for (size_t j = 0; j < alltypes.size(); j++) {
+ SCOPED_TRACE(std::format("{} == {}", *alltypes[i], *alltypes[j]));
+
+ if (i == j) {
+ ASSERT_EQ(*alltypes[i], *alltypes[j]);
+ } else {
+ ASSERT_NE(*alltypes[i], *alltypes[j]);
+ }
+ }
+ }
+}
+
+TEST(TypeTest, Decimal) {
+ {
+ iceberg::DecimalType decimal(38, 2);
+ ASSERT_EQ(38, decimal.precision());
+ ASSERT_EQ(2, decimal.scale());
+ }
+ {
+ iceberg::DecimalType decimal(10, -10);
+ ASSERT_EQ(10, decimal.precision());
+ ASSERT_EQ(-10, decimal.scale());
+ }
+ ASSERT_THAT([]() { iceberg::DecimalType decimal(-1, 10); },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("precision must be in [0, 38], was -1")));
+
+ ASSERT_THAT([]() { iceberg::DecimalType decimal(39, 10); },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("precision must be in [0, 38], was 39")));
+}
+
+TEST(TypeTest, Fixed) {
+ {
+ iceberg::FixedType fixed(0);
+ ASSERT_EQ(0, fixed.length());
+ }
+ {
+ iceberg::FixedType fixed(1);
+ ASSERT_EQ(1, fixed.length());
+ }
+ {
+ iceberg::FixedType fixed(127);
+ ASSERT_EQ(127, fixed.length());
+ }
+ ASSERT_THAT([]() { iceberg::FixedType decimal(-1); },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("length must be >= 0, was -1")));
+}
+
+TEST(TypeTest, List) {
+ {
+ iceberg::SchemaField field(5, "element", std::make_shared(),
+ true);
+ iceberg::ListType list(field);
+ std::span fields = list.fields();
+ ASSERT_EQ(1, fields.size());
+ ASSERT_EQ(field, fields[0]);
+ ASSERT_THAT(list.GetFieldById(5), ::testing::Optional(field));
+ ASSERT_THAT(list.GetFieldByIndex(0), ::testing::Optional(field));
+ ASSERT_THAT(list.GetFieldByName("element"), ::testing::Optional(field));
+
+ ASSERT_EQ(std::nullopt, list.GetFieldById(0));
+ ASSERT_EQ(std::nullopt, list.GetFieldByIndex(1));
+ ASSERT_EQ(std::nullopt, list.GetFieldByIndex(-1));
+ ASSERT_EQ(std::nullopt, list.GetFieldByName("foo"));
+ }
+ ASSERT_THAT(
+ []() {
+ iceberg::ListType list(iceberg::SchemaField(
+ 1, "wrongname", std::make_shared(), true));
+ },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("child field name should be 'element', was 'wrongname'")));
+}
+
+TEST(TypeTest, Map) {}
+
+TEST(TypeTest, Struct) {}
From 85f1ade4c3142ab5bca4486bfdb9bc2f5f423327 Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 21:23:51 -0500
Subject: [PATCH 06/13] add more unit tests
---
src/iceberg/type.cc | 2 +-
test/core/type_test.cc | 75 ++++++++++++++++++++++++++++++++++++++++--
2 files changed, 74 insertions(+), 3 deletions(-)
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index dc3235442..2cdeaf63f 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -233,7 +233,7 @@ std::optional> MapType::GetFieldByInde
int32_t index) const {
if (index == 0) {
return key();
- } else if (index == 0) {
+ } else if (index == 1) {
return value();
}
return std::nullopt;
diff --git a/test/core/type_test.cc b/test/core/type_test.cc
index d05b76e9f..7d7112021 100644
--- a/test/core/type_test.cc
+++ b/test/core/type_test.cc
@@ -342,6 +342,77 @@ TEST(TypeTest, List) {
::testing::HasSubstr("child field name should be 'element', was 'wrongname'")));
}
-TEST(TypeTest, Map) {}
+TEST(TypeTest, Map) {
+ {
+ iceberg::SchemaField key(5, "key", std::make_shared(), true);
+ iceberg::SchemaField value(7, "value", std::make_shared(), true);
+ iceberg::MapType map(key, value);
+ std::span fields = map.fields();
+ ASSERT_EQ(2, fields.size());
+ ASSERT_EQ(key, fields[0]);
+ ASSERT_EQ(value, fields[1]);
+ ASSERT_THAT(map.GetFieldById(5), ::testing::Optional(key));
+ ASSERT_THAT(map.GetFieldById(7), ::testing::Optional(value));
+ ASSERT_THAT(map.GetFieldByIndex(0), ::testing::Optional(key));
+ ASSERT_THAT(map.GetFieldByIndex(1), ::testing::Optional(value));
+ ASSERT_THAT(map.GetFieldByName("key"), ::testing::Optional(key));
+ ASSERT_THAT(map.GetFieldByName("value"), ::testing::Optional(value));
+
+ ASSERT_EQ(std::nullopt, map.GetFieldById(0));
+ ASSERT_EQ(std::nullopt, map.GetFieldByIndex(2));
+ ASSERT_EQ(std::nullopt, map.GetFieldByIndex(-1));
+ ASSERT_EQ(std::nullopt, map.GetFieldByName("element"));
+ }
+ ASSERT_THAT(
+ []() {
+ iceberg::SchemaField key(5, "notkey", std::make_shared(),
+ true);
+ iceberg::SchemaField value(7, "value", std::make_shared(),
+ true);
+ iceberg::MapType map(key, value);
+ },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("key field name should be 'key', was 'notkey'")));
+ ASSERT_THAT(
+ []() {
+ iceberg::SchemaField key(5, "key", std::make_shared(), true);
+ iceberg::SchemaField value(7, "notvalue", std::make_shared(),
+ true);
+ iceberg::MapType map(key, value);
+ },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("value field name should be 'value', was 'notvalue'")));
+}
-TEST(TypeTest, Struct) {}
+TEST(TypeTest, Struct) {
+ {
+ iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
+ iceberg::SchemaField field2(7, "bar", std::make_shared(), true);
+ iceberg::StructType struct_({field1, field2});
+ std::span fields = struct_.fields();
+ ASSERT_EQ(2, fields.size());
+ ASSERT_EQ(field1, fields[0]);
+ ASSERT_EQ(field2, fields[1]);
+ ASSERT_THAT(struct_.GetFieldById(5), ::testing::Optional(field1));
+ ASSERT_THAT(struct_.GetFieldById(7), ::testing::Optional(field2));
+ ASSERT_THAT(struct_.GetFieldByIndex(0), ::testing::Optional(field1));
+ ASSERT_THAT(struct_.GetFieldByIndex(1), ::testing::Optional(field2));
+ ASSERT_THAT(struct_.GetFieldByName("foo"), ::testing::Optional(field1));
+ ASSERT_THAT(struct_.GetFieldByName("bar"), ::testing::Optional(field2));
+
+ ASSERT_EQ(std::nullopt, struct_.GetFieldById(0));
+ ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(2));
+ ASSERT_EQ(std::nullopt, struct_.GetFieldByIndex(-1));
+ ASSERT_EQ(std::nullopt, struct_.GetFieldByName("element"));
+ }
+ ASSERT_THAT(
+ []() {
+ iceberg::SchemaField field1(5, "foo", std::make_shared(),
+ true);
+ iceberg::SchemaField field2(5, "bar", std::make_shared(),
+ true);
+ iceberg::StructType struct_({field1, field2});
+ },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("duplicate field ID 5")));
+}
From e609d5d8d00bed1474bb5b7f4c4c0ab0d197f3fb Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 21:24:48 -0500
Subject: [PATCH 07/13] depend on gmock
---
test/core/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt
index 787f5cbd1..3261a0bee 100644
--- a/test/core/CMakeLists.txt
+++ b/test/core/CMakeLists.txt
@@ -17,6 +17,6 @@
add_executable(core_unittest)
target_sources(core_unittest PRIVATE core_unittest.cc type_test.cc)
-target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main)
+target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main GTest::gmock)
target_include_directories(core_unittest PRIVATE "${ICEBERG_INCLUDES}")
add_test(NAME core_unittest COMMAND core_unittest)
From bbc18d50620828001ac43e3408c7fee2947ceb8d Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 22 Jan 2025 21:52:19 -0500
Subject: [PATCH 08/13] add field/schema tests
---
test/core/CMakeLists.txt | 3 +-
test/core/schema_field_test.cc | 81 ++++++++++++++++++++++++++++++++
test/core/schema_test.cc | 85 ++++++++++++++++++++++++++++++++++
3 files changed, 168 insertions(+), 1 deletion(-)
create mode 100644 test/core/schema_field_test.cc
create mode 100644 test/core/schema_test.cc
diff --git a/test/core/CMakeLists.txt b/test/core/CMakeLists.txt
index 3261a0bee..a7fba1cab 100644
--- a/test/core/CMakeLists.txt
+++ b/test/core/CMakeLists.txt
@@ -16,7 +16,8 @@
# under the License.
add_executable(core_unittest)
-target_sources(core_unittest PRIVATE core_unittest.cc type_test.cc)
+target_sources(core_unittest PRIVATE core_unittest.cc schema_test.cc schema_field_test.cc
+ type_test.cc)
target_link_libraries(core_unittest PRIVATE iceberg_static GTest::gtest_main GTest::gmock)
target_include_directories(core_unittest PRIVATE "${ICEBERG_INCLUDES}")
add_test(NAME core_unittest COMMAND core_unittest)
diff --git a/test/core/schema_field_test.cc b/test/core/schema_field_test.cc
new file mode 100644
index 000000000..69eb253ae
--- /dev/null
+++ b/test/core/schema_field_test.cc
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/schema_field.h"
+
+#include
+#include
+
+#include
+
+#include "iceberg/type.h"
+#include "iceberg/util/formatter.h"
+
+TEST(SchemaFieldTest, Basics) {
+ {
+ iceberg::SchemaField field(1, "foo", std::make_shared(), false);
+ EXPECT_EQ(1, field.field_id());
+ EXPECT_EQ("foo", field.name());
+ EXPECT_EQ(iceberg::TypeId::kInt32, field.type()->type_id());
+ EXPECT_FALSE(field.optional());
+ EXPECT_EQ("foo (1): int32 (required)", field.ToString());
+ EXPECT_EQ("foo (1): int32 (required)", std::format("{}", field));
+ }
+ {
+ iceberg::SchemaField field = iceberg::SchemaField::MakeOptional(
+ 2, "foo bar", std::make_shared(10));
+ EXPECT_EQ(2, field.field_id());
+ EXPECT_EQ("foo bar", field.name());
+ EXPECT_EQ(iceberg::FixedType(10), *field.type());
+ EXPECT_TRUE(field.optional());
+ EXPECT_EQ("foo bar (2): fixed(10)", field.ToString());
+ EXPECT_EQ("foo bar (2): fixed(10)", std::format("{}", field));
+ }
+ {
+ iceberg::SchemaField field = iceberg::SchemaField::MakeRequired(
+ 2, "foo bar", std::make_shared(10));
+ EXPECT_EQ(2, field.field_id());
+ EXPECT_EQ("foo bar", field.name());
+ EXPECT_EQ(iceberg::FixedType(10), *field.type());
+ EXPECT_FALSE(field.optional());
+ EXPECT_EQ("foo bar (2): fixed(10) (required)", field.ToString());
+ EXPECT_EQ("foo bar (2): fixed(10) (required)", std::format("{}", field));
+ }
+}
+
+TEST(SchemaFieldTest, Equality) {
+ iceberg::SchemaField field1(1, "foo", std::make_shared(), false);
+ iceberg::SchemaField field2(2, "foo", std::make_shared(), false);
+ iceberg::SchemaField field3(1, "bar", std::make_shared(), false);
+ iceberg::SchemaField field4(1, "foo", std::make_shared(), false);
+ iceberg::SchemaField field5(1, "foo", std::make_shared(), true);
+ iceberg::SchemaField field6(1, "foo", std::make_shared(), false);
+
+ ASSERT_EQ(field1, field1);
+ ASSERT_NE(field1, field2);
+ ASSERT_NE(field2, field1);
+ ASSERT_NE(field1, field3);
+ ASSERT_NE(field3, field2);
+ ASSERT_NE(field1, field4);
+ ASSERT_NE(field4, field1);
+ ASSERT_NE(field1, field5);
+ ASSERT_NE(field5, field1);
+ ASSERT_EQ(field1, field6);
+ ASSERT_EQ(field6, field1);
+}
diff --git a/test/core/schema_test.cc b/test/core/schema_test.cc
new file mode 100644
index 000000000..aedb8c081
--- /dev/null
+++ b/test/core/schema_test.cc
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/schema.h"
+
+#include
+#include
+
+#include
+#include
+
+#include "iceberg/schema_field.h"
+#include "iceberg/util/formatter.h"
+
+TEST(SchemaTest, Basics) {
+ {
+ iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
+ iceberg::SchemaField field2(7, "bar", std::make_shared(), true);
+ iceberg::Schema schema(100, {field1, field2});
+ ASSERT_EQ(schema, schema);
+ ASSERT_EQ(100, schema.schema_id());
+ std::span fields = schema.fields();
+ ASSERT_EQ(2, fields.size());
+ ASSERT_EQ(field1, fields[0]);
+ ASSERT_EQ(field2, fields[1]);
+ ASSERT_THAT(schema.GetFieldById(5), ::testing::Optional(field1));
+ ASSERT_THAT(schema.GetFieldById(7), ::testing::Optional(field2));
+ ASSERT_THAT(schema.GetFieldByIndex(0), ::testing::Optional(field1));
+ ASSERT_THAT(schema.GetFieldByIndex(1), ::testing::Optional(field2));
+ ASSERT_THAT(schema.GetFieldByName("foo"), ::testing::Optional(field1));
+ ASSERT_THAT(schema.GetFieldByName("bar"), ::testing::Optional(field2));
+
+ ASSERT_EQ(std::nullopt, schema.GetFieldById(0));
+ ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(2));
+ ASSERT_EQ(std::nullopt, schema.GetFieldByIndex(-1));
+ ASSERT_EQ(std::nullopt, schema.GetFieldByName("element"));
+ }
+ ASSERT_THAT(
+ []() {
+ iceberg::SchemaField field1(5, "foo", std::make_shared(),
+ true);
+ iceberg::SchemaField field2(5, "bar", std::make_shared(),
+ true);
+ iceberg::Schema schema(100, {field1, field2});
+ },
+ ::testing::ThrowsMessage(
+ ::testing::HasSubstr("duplicate field ID 5")));
+}
+
+TEST(SchemaTest, Equality) {
+ iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
+ iceberg::SchemaField field2(7, "bar", std::make_shared(), true);
+ iceberg::SchemaField field3(5, "foobar", std::make_shared(), true);
+ iceberg::Schema schema1(100, {field1, field2});
+ iceberg::Schema schema2(101, {field1, field2});
+ iceberg::Schema schema3(101, {field1});
+ iceberg::Schema schema4(101, {field3, field2});
+ iceberg::Schema schema5(100, {field1, field2});
+
+ ASSERT_EQ(schema1, schema1);
+ ASSERT_NE(schema1, schema2);
+ ASSERT_NE(schema2, schema1);
+ ASSERT_NE(schema1, schema3);
+ ASSERT_NE(schema3, schema1);
+ ASSERT_NE(schema1, schema4);
+ ASSERT_NE(schema4, schema1);
+ ASSERT_EQ(schema1, schema5);
+ ASSERT_EQ(schema5, schema1);
+}
From d2948b5fe5478f609ca4480d5ec324371a837176 Mon Sep 17 00:00:00 2001
From: David Li
Date: Sun, 26 Jan 2025 18:58:25 -0500
Subject: [PATCH 09/13] updates
---
src/iceberg/schema.h | 2 +-
src/iceberg/type.cc | 34 +++++++++---------
src/iceberg/type.h | 43 ++++++++++++-----------
src/iceberg/type_fwd.h | 27 +++++++--------
test/core/schema_field_test.cc | 20 +++++------
test/core/schema_test.cc | 9 +++--
test/core/type_test.cc | 63 ++++++++++++++++------------------
7 files changed, 97 insertions(+), 101 deletions(-)
diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h
index 43ec9e16a..c58802d2f 100644
--- a/src/iceberg/schema.h
+++ b/src/iceberg/schema.h
@@ -44,7 +44,7 @@ class ICEBERG_EXPORT Schema : public StructType {
/// \brief Get the schema ID.
///
- /// Schemas are identified by a unique ID for the purposes of schema
+ /// A schema is identified by a unique ID for the purposes of schema
/// evolution.
[[nodiscard]] int32_t schema_id() const;
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index 2cdeaf63f..6d8f89001 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -33,28 +33,26 @@ bool BooleanType::Equals(const Type& other) const {
return other.type_id() == TypeId::kBoolean;
}
-TypeId Int32Type::type_id() const { return TypeId::kInt32; }
-std::string Int32Type::ToString() const { return "int32"; }
-bool Int32Type::Equals(const Type& other) const {
- return other.type_id() == TypeId::kInt32;
-}
+TypeId IntType::type_id() const { return TypeId::kInt; }
+std::string IntType::ToString() const { return "int"; }
+bool IntType::Equals(const Type& other) const { return other.type_id() == TypeId::kInt; }
-TypeId Int64Type::type_id() const { return TypeId::kInt64; }
-std::string Int64Type::ToString() const { return "int64"; }
-bool Int64Type::Equals(const Type& other) const {
- return other.type_id() == TypeId::kInt64;
+TypeId LongType::type_id() const { return TypeId::kLong; }
+std::string LongType::ToString() const { return "long"; }
+bool LongType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kLong;
}
-TypeId Float32Type::type_id() const { return TypeId::kFloat32; }
-std::string Float32Type::ToString() const { return "float32"; }
-bool Float32Type::Equals(const Type& other) const {
- return other.type_id() == TypeId::kFloat32;
+TypeId FloatType::type_id() const { return TypeId::kFloat; }
+std::string FloatType::ToString() const { return "float"; }
+bool FloatType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kFloat;
}
-TypeId Float64Type::type_id() const { return TypeId::kFloat64; }
-std::string Float64Type::ToString() const { return "float64"; }
-bool Float64Type::Equals(const Type& other) const {
- return other.type_id() == TypeId::kFloat64;
+TypeId DoubleType::type_id() const { return TypeId::kDouble; }
+std::string DoubleType::ToString() const { return "double"; }
+bool DoubleType::Equals(const Type& other) const {
+ return other.type_id() == TypeId::kDouble;
}
DecimalType::DecimalType(int32_t precision, int32_t scale)
@@ -265,7 +263,7 @@ StructType::StructType(std::vector fields) : fields_(std::move(fiel
field.field_id(), it->second, index));
}
- index++;
+ ++index;
}
}
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index 50f3d18a9..c6c2d93c3 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -63,14 +63,14 @@ class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
[[nodiscard]] virtual bool Equals(const Type& other) const = 0;
};
-/// \brief A data type that may not have child fields.
+/// \brief A data type that does not have child fields.
class ICEBERG_EXPORT PrimitiveType : public Type {
public:
bool is_primitive() const override { return true; }
bool is_nested() const override { return false; }
};
-/// \brief A data type that may have child fields.
+/// \brief A data type that has child fields.
class ICEBERG_EXPORT NestedType : public Type {
public:
bool is_primitive() const override { return false; }
@@ -93,7 +93,7 @@ class ICEBERG_EXPORT NestedType : public Type {
/// Primitive types do not have nested fields.
/// @{
-/// \brief A data type representing a boolean.
+/// \brief A data type representing a boolean (true or false).
class ICEBERG_EXPORT BooleanType : public PrimitiveType {
public:
BooleanType() = default;
@@ -107,10 +107,10 @@ class ICEBERG_EXPORT BooleanType : public PrimitiveType {
};
/// \brief A data type representing a 32-bit signed integer.
-class ICEBERG_EXPORT Int32Type : public PrimitiveType {
+class ICEBERG_EXPORT IntType : public PrimitiveType {
public:
- Int32Type() = default;
- ~Int32Type() = default;
+ IntType() = default;
+ ~IntType() = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -120,10 +120,10 @@ class ICEBERG_EXPORT Int32Type : public PrimitiveType {
};
/// \brief A data type representing a 64-bit signed integer.
-class ICEBERG_EXPORT Int64Type : public PrimitiveType {
+class ICEBERG_EXPORT LongType : public PrimitiveType {
public:
- Int64Type() = default;
- ~Int64Type() = default;
+ LongType() = default;
+ ~LongType() = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -132,11 +132,12 @@ class ICEBERG_EXPORT Int64Type : public PrimitiveType {
bool Equals(const Type& other) const override;
};
-/// \brief A data type representing a 32-bit (single precision) float.
-class ICEBERG_EXPORT Float32Type : public PrimitiveType {
+/// \brief A data type representing a 32-bit (single precision) IEEE-754
+/// float.
+class ICEBERG_EXPORT FloatType : public PrimitiveType {
public:
- Float32Type() = default;
- ~Float32Type() = default;
+ FloatType() = default;
+ ~FloatType() = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -145,11 +146,12 @@ class ICEBERG_EXPORT Float32Type : public PrimitiveType {
bool Equals(const Type& other) const override;
};
-/// \brief A data type representing a 64-bit (double precision) float.
-class ICEBERG_EXPORT Float64Type : public PrimitiveType {
+/// \brief A data type representing a 64-bit (double precision) IEEE-754
+/// float.
+class ICEBERG_EXPORT DoubleType : public PrimitiveType {
public:
- Float64Type() = default;
- ~Float64Type() = default;
+ DoubleType() = default;
+ ~DoubleType() = default;
TypeId type_id() const override;
std::string ToString() const override;
@@ -240,7 +242,7 @@ class ICEBERG_EXPORT TimestampType : public TimestampBase {
};
/// \brief A data type representing a timestamp as microseconds since the
-/// epoch in UTC.
+/// epoch in UTC. A time zone or offset is not stored.
class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
public:
TimestampTzType() = default;
@@ -256,7 +258,7 @@ class ICEBERG_EXPORT TimestampTzType : public TimestampBase {
bool Equals(const Type& other) const override;
};
-/// \brief A data type representing a bytestring.
+/// \brief A data type representing an arbitrary-length byte sequence.
class ICEBERG_EXPORT BinaryType : public PrimitiveType {
public:
BinaryType() = default;
@@ -269,7 +271,8 @@ class ICEBERG_EXPORT BinaryType : public PrimitiveType {
bool Equals(const Type& other) const override;
};
-/// \brief A data type representing a string.
+/// \brief A data type representing an arbitrary-length character sequence
+/// (encoded in UTF-8).
class ICEBERG_EXPORT StringType : public PrimitiveType {
public:
StringType() = default;
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 5726c8f1e..89043938b 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -33,23 +33,23 @@ namespace iceberg {
///
/// Iceberg V3 types are not currently supported.
enum class TypeId {
+ kStruct,
+ kList,
+ kMap,
kBoolean,
- kInt32,
- kInt64,
- kFloat32,
- kFloat64,
+ kInt,
+ kLong,
+ kFloat,
+ kDouble,
kDecimal,
kDate,
kTime,
kTimestamp,
kTimestampTz,
- kBinary,
kString,
- kFixed,
kUuid,
- kStruct,
- kList,
- kMap,
+ kFixed,
+ kBinary,
};
/// \brief The time unit. In Iceberg V3 nanoseconds are also supported.
@@ -62,10 +62,10 @@ class BooleanType;
class DateType;
class DecimalType;
class FixedType;
-class Float32Type;
-class Float64Type;
-class Int32Type;
-class Int64Type;
+class FloatType;
+class DoubleType;
+class IntType;
+class LongType;
class ListType;
class MapType;
class NestedType;
@@ -74,7 +74,6 @@ class Schema;
class SchemaField;
class StringType;
class StructType;
-class StructType;
class TimeType;
class TimestampBase;
class TimestampType;
diff --git a/test/core/schema_field_test.cc b/test/core/schema_field_test.cc
index 69eb253ae..d5fc63390 100644
--- a/test/core/schema_field_test.cc
+++ b/test/core/schema_field_test.cc
@@ -29,13 +29,13 @@
TEST(SchemaFieldTest, Basics) {
{
- iceberg::SchemaField field(1, "foo", std::make_shared(), false);
+ iceberg::SchemaField field(1, "foo", std::make_shared(), false);
EXPECT_EQ(1, field.field_id());
EXPECT_EQ("foo", field.name());
- EXPECT_EQ(iceberg::TypeId::kInt32, field.type()->type_id());
+ EXPECT_EQ(iceberg::TypeId::kInt, field.type()->type_id());
EXPECT_FALSE(field.optional());
- EXPECT_EQ("foo (1): int32 (required)", field.ToString());
- EXPECT_EQ("foo (1): int32 (required)", std::format("{}", field));
+ EXPECT_EQ("foo (1): int (required)", field.ToString());
+ EXPECT_EQ("foo (1): int (required)", std::format("{}", field));
}
{
iceberg::SchemaField field = iceberg::SchemaField::MakeOptional(
@@ -60,12 +60,12 @@ TEST(SchemaFieldTest, Basics) {
}
TEST(SchemaFieldTest, Equality) {
- iceberg::SchemaField field1(1, "foo", std::make_shared(), false);
- iceberg::SchemaField field2(2, "foo", std::make_shared(), false);
- iceberg::SchemaField field3(1, "bar", std::make_shared(), false);
- iceberg::SchemaField field4(1, "foo", std::make_shared(), false);
- iceberg::SchemaField field5(1, "foo", std::make_shared(), true);
- iceberg::SchemaField field6(1, "foo", std::make_shared(), false);
+ iceberg::SchemaField field1(1, "foo", std::make_shared(), false);
+ iceberg::SchemaField field2(2, "foo", std::make_shared(), false);
+ iceberg::SchemaField field3(1, "bar", std::make_shared(), false);
+ iceberg::SchemaField field4(1, "foo", std::make_shared(), false);
+ iceberg::SchemaField field5(1, "foo", std::make_shared(), true);
+ iceberg::SchemaField field6(1, "foo", std::make_shared(), false);
ASSERT_EQ(field1, field1);
ASSERT_NE(field1, field2);
diff --git a/test/core/schema_test.cc b/test/core/schema_test.cc
index aedb8c081..43401947e 100644
--- a/test/core/schema_test.cc
+++ b/test/core/schema_test.cc
@@ -30,7 +30,7 @@
TEST(SchemaTest, Basics) {
{
- iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
+ iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
iceberg::SchemaField field2(7, "bar", std::make_shared(), true);
iceberg::Schema schema(100, {field1, field2});
ASSERT_EQ(schema, schema);
@@ -53,8 +53,7 @@ TEST(SchemaTest, Basics) {
}
ASSERT_THAT(
[]() {
- iceberg::SchemaField field1(5, "foo", std::make_shared(),
- true);
+ iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
iceberg::SchemaField field2(5, "bar", std::make_shared(),
true);
iceberg::Schema schema(100, {field1, field2});
@@ -64,9 +63,9 @@ TEST(SchemaTest, Basics) {
}
TEST(SchemaTest, Equality) {
- iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
+ iceberg::SchemaField field1(5, "foo", std::make_shared(), true);
iceberg::SchemaField field2(7, "bar", std::make_shared(), true);
- iceberg::SchemaField field3(5, "foobar", std::make_shared(), true);
+ iceberg::SchemaField field3(5, "foobar", std::make_shared(), true);
iceberg::Schema schema1(100, {field1, field2});
iceberg::Schema schema2(101, {field1, field2});
iceberg::Schema schema3(101, {field1});
diff --git a/test/core/type_test.cc b/test/core/type_test.cc
index 7d7112021..3a4b8d70d 100644
--- a/test/core/type_test.cc
+++ b/test/core/type_test.cc
@@ -96,32 +96,32 @@ const static TypeTestCase kPrimitiveTypes[] = {
.repr = "boolean",
},
{
- .name = "int32",
- .type = std::make_shared(),
- .type_id = iceberg::TypeId::kInt32,
+ .name = "int",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kInt,
.primitive = true,
- .repr = "int32",
+ .repr = "int",
},
{
- .name = "int64",
- .type = std::make_shared(),
- .type_id = iceberg::TypeId::kInt64,
+ .name = "long",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kLong,
.primitive = true,
- .repr = "int64",
+ .repr = "long",
},
{
- .name = "float32",
- .type = std::make_shared(),
- .type_id = iceberg::TypeId::kFloat32,
+ .name = "float",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kFloat,
.primitive = true,
- .repr = "float32",
+ .repr = "float",
},
{
- .name = "float64",
- .type = std::make_shared(),
- .type_id = iceberg::TypeId::kFloat64,
+ .name = "double",
+ .type = std::make_shared(),
+ .type_id = iceberg::TypeId::kDouble,
.primitive = true,
- .repr = "float64",
+ .repr = "double",
},
{
.name = "decimal9_2",
@@ -206,45 +206,45 @@ const static TypeTestCase kNestedTypes[] = {
{
.name = "list_int",
.type = std::make_shared(
- 1, std::make_shared(), true),
+ 1, std::make_shared(), true),
.type_id = iceberg::TypeId::kList,
.primitive = false,
- .repr = "list",
+ .repr = "list",
},
{
.name = "list_list_int",
.type = std::make_shared