From a3b411745c480f3df5528f17106067728dda4da1 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Fri, 21 Mar 2025 15:33:34 +0800 Subject: [PATCH 1/3] Add pure virtual classes for Catalog, Table, etc. --- example/demo_example.cc | 4 +- src/iceberg/CMakeLists.txt | 2 +- src/iceberg/arrow/demo_arrow.cc | 5 +- src/iceberg/arrow/demo_arrow.h | 4 +- src/iceberg/avro/demo_avro.cc | 2 +- src/iceberg/catalog.h | 178 +++++++++++++++++++++++++ src/iceberg/{demo_table.cc => demo.cc} | 9 +- src/iceberg/{demo_table.h => demo.h} | 12 +- src/iceberg/location_provider.h | 56 ++++++++ src/iceberg/table.h | 78 ++++++++++- src/iceberg/table_identifier.h | 43 ++++++ src/iceberg/table_operations.h | 94 +++++++++++++ src/iceberg/transaction.h | 53 ++++++++ src/iceberg/type_fwd.h | 32 +++++ test/core/core_unittest.cc | 6 +- 15 files changed, 557 insertions(+), 21 deletions(-) create mode 100644 src/iceberg/catalog.h rename src/iceberg/{demo_table.cc => demo.cc} (79%) rename src/iceberg/{demo_table.h => demo.h} (83%) create mode 100644 src/iceberg/location_provider.h create mode 100644 src/iceberg/table_identifier.h create mode 100644 src/iceberg/table_operations.h create mode 100644 src/iceberg/transaction.h diff --git a/example/demo_example.cc b/example/demo_example.cc index f584339ed..ef492bc17 100644 --- a/example/demo_example.cc +++ b/example/demo_example.cc @@ -21,11 +21,11 @@ #include "iceberg/arrow/demo_arrow.h" #include "iceberg/avro/demo_avro.h" -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" #include "iceberg/puffin/demo_puffin.h" int main() { - std::cout << iceberg::DemoTable().print() << std::endl; + std::cout << iceberg::Demo().print() << std::endl; std::cout << iceberg::puffin::DemoPuffin().print() << std::endl; std::cout << iceberg::arrow::DemoArrow().print() << std::endl; std::cout << iceberg::avro::DemoAvro().print() << std::endl; diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 6ecea2116..a182ac3a4 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -19,7 +19,7 @@ set(ICEBERG_INCLUDES "$" "$") set(ICEBERG_SOURCES arrow_c_data_internal.cc - demo_table.cc + demo.cc schema.cc schema_field.cc type.cc) diff --git a/src/iceberg/arrow/demo_arrow.cc b/src/iceberg/arrow/demo_arrow.cc index 22c2b2a5d..e67a0740a 100644 --- a/src/iceberg/arrow/demo_arrow.cc +++ b/src/iceberg/arrow/demo_arrow.cc @@ -21,13 +21,12 @@ #include -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" namespace iceberg::arrow { std::string DemoArrow::print() const { - return DemoTable().print() + - ", Arrow version: " + ::arrow::GetBuildInfo().version_string; + return Demo().print() + ", Arrow version: " + ::arrow::GetBuildInfo().version_string; } } // namespace iceberg::arrow diff --git a/src/iceberg/arrow/demo_arrow.h b/src/iceberg/arrow/demo_arrow.h index 61ac953c5..569458fa3 100644 --- a/src/iceberg/arrow/demo_arrow.h +++ b/src/iceberg/arrow/demo_arrow.h @@ -22,11 +22,11 @@ #include #include "iceberg/arrow/iceberg_arrow_export.h" -#include "iceberg/table.h" +#include "iceberg/demo.h" namespace iceberg::arrow { -class ICEBERG_ARROW_EXPORT DemoArrow : public Table { +class ICEBERG_ARROW_EXPORT DemoArrow : public Demo { public: DemoArrow() = default; ~DemoArrow() override = default; diff --git a/src/iceberg/avro/demo_avro.cc b/src/iceberg/avro/demo_avro.cc index b4bf00ef3..2ff772e99 100644 --- a/src/iceberg/avro/demo_avro.cc +++ b/src/iceberg/avro/demo_avro.cc @@ -23,7 +23,7 @@ #include "avro/Compiler.hh" #include "avro/ValidSchema.hh" -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" namespace iceberg::avro { diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h new file mode 100644 index 000000000..7846e8b5d --- /dev/null +++ b/src/iceberg/catalog.h @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "iceberg/expected.h" +#include "iceberg/table_identifier.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief A Catalog API for table create, drop, and load operations. +class ICEBERG_EXPORT Catalog { + public: + virtual ~Catalog() = default; + + /// \brief Return the name for this catalog + virtual std::string name() const = 0; + + /// \brief Return all the identifiers under this namespace + /// + /// \param ns a namespace + /// \return a list of identifiers for tables or Error::kNoSuchNamespace + /// if the namespace does not exist + virtual expected, Error> ListTables( + const Namespace& ns) const = 0; + + /// \brief Create a table + /// + /// \param identifier a table identifier + /// \param schema a schema + /// \param spec a partition spec + /// \param location a location for the table; leave empty if unspecified + /// \param properties a string map of table properties + /// \return a Table instance or Error::kAlreadyExists if the table already exists + virtual expected, Error> CreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::map& properties) = 0; + + /// \brief Start a transaction to create a table + /// + /// \param identifier a table identifier + /// \param schema a schema + /// \param spec a partition spec + /// \param location a location for the table; leave empty if unspecified + /// \param properties a string map of table properties + /// \return a Transaction to create the table or Error::kAlreadyExists if the table + /// already exists + virtual expected, Error> NewCreateTableTransaction( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::map& properties) = 0; + + /// \brief Check whether table exists + /// + /// \param identifier a table identifier + /// \return true if the table exists, false otherwise + virtual bool TableExists(const TableIdentifier& identifier) const = 0; + + /// \brief Drop a table; optionally delete data and metadata files + /// + /// If purge is set to true the implementation should delete all data and metadata + /// files. + /// + /// \param identifier a table identifier + /// \param purge if true, delete all data and metadata files in the table + /// \return true if the table was dropped, false if the table did not exist + virtual bool DropTable(const TableIdentifier& identifier, bool purge) = 0; + + /// \brief Load a table + /// + /// \param identifier a table identifier + /// \return instance of Table implementation referred to by identifier or + /// Error::kNoSuchTable if the table does not exist + virtual expected, Error> LoadTable( + const TableIdentifier& identifier) const = 0; + + /// \brief Register a table with the catalog if it does not exist + /// + /// \param identifier a table identifier + /// \param metadata_file_location the location of a metadata file + /// \return a Table instance or Error::kAlreadyExists if the table already exists + virtual expected, Error> RegisterTable( + const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; + + /// \brief Initialize a catalog given a custom name and a map of catalog properties + /// + /// A custom Catalog implementation must have a default constructor. A compute engine + /// will first initialize the catalog without any arguments, and then call this method + /// to complete catalog initialization with properties passed into the engine. + /// + /// \param name a custom name for the catalog + /// \param properties catalog properties + virtual void Initialize(const std::string& name, + const std::map& properties) = 0; + + /// \brief Instantiate a builder to either create a table or start a create/replace + /// transaction + /// + /// \param identifier a table identifier + /// \param schema a schema + /// \return the builder to create a table or start a create/replace transaction + virtual std::unique_ptr BuildTable( + const TableIdentifier& identifier, const Schema& schema) const = 0; + + /// \brief A builder used to create valid tables or start create/replace transactions + class TableBuilder { + public: + virtual ~TableBuilder() = default; + + /// \brief Sets a partition spec for the table + /// + /// \param spec a partition spec + /// \return this for method chaining + virtual TableBuilder& WithPartitionSpec(const PartitionSpec& spec) = 0; + + /// \brief Sets a sort order for the table + /// + /// \param sort_order a sort order + /// \return this for method chaining + virtual TableBuilder& WithSortOrder(const SortOrder& sort_order) = 0; + + /// \brief Sets a location for the table + /// + /// \param location a location + /// \return this for method chaining + virtual TableBuilder& WithLocation(const std::string& location) = 0; + + /// \brief Adds key/value properties to the table + /// + /// \param properties key/value properties + /// \return this for method chaining + virtual TableBuilder& WithProperties( + const std::map& properties) = 0; + + /// \brief Adds a key/value property to the table + /// + /// \param key a key + /// \param value a value + /// \return this for method chaining + virtual TableBuilder& WithProperty(const std::string& key, + const std::string& value) = 0; + + /// \brief Creates the table + /// + /// \return the created table + virtual std::unique_ptr Create() = 0; + + /// \brief Starts a transaction to create the table + /// + /// \return the Transaction to create the table + virtual std::unique_ptr CreateTransaction() = 0; + }; +}; + +} // namespace iceberg diff --git a/src/iceberg/demo_table.cc b/src/iceberg/demo.cc similarity index 79% rename from src/iceberg/demo_table.cc rename to src/iceberg/demo.cc index 9e46bdadc..433d56792 100644 --- a/src/iceberg/demo_table.cc +++ b/src/iceberg/demo.cc @@ -17,13 +17,18 @@ * under the License. */ -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" #include "iceberg/avro.h" // include to export symbols +#include "iceberg/catalog.h" +#include "iceberg/location_provider.h" #include "iceberg/puffin.h" +#include "iceberg/table.h" +#include "iceberg/table_operations.h" +#include "iceberg/transaction.h" namespace iceberg { -std::string DemoTable::print() const { return "DemoTable"; } +std::string Demo::print() const { return "Demo"; } } // namespace iceberg diff --git a/src/iceberg/demo_table.h b/src/iceberg/demo.h similarity index 83% rename from src/iceberg/demo_table.h rename to src/iceberg/demo.h index 2dabaa5ca..7e810f9c7 100644 --- a/src/iceberg/demo_table.h +++ b/src/iceberg/demo.h @@ -19,16 +19,18 @@ #pragma once -#include "iceberg/table.h" +#include + +#include "iceberg/iceberg_export.h" namespace iceberg { -class ICEBERG_EXPORT DemoTable : public Table { +class ICEBERG_EXPORT Demo { public: - DemoTable() = default; - ~DemoTable() override = default; + Demo() = default; + virtual ~Demo() = default; - std::string print() const override; + virtual std::string print() const; }; } // namespace iceberg diff --git a/src/iceberg/location_provider.h b/src/iceberg/location_provider.h new file mode 100644 index 000000000..90c63eb68 --- /dev/null +++ b/src/iceberg/location_provider.h @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief Interface for providing data file locations to write tasks. +class ICEBERG_EXPORT LocationProvider { + public: + virtual ~LocationProvider() = default; + + /// \brief Return a fully-qualified data file location for the given filename. + /// + /// \param filename a file name + /// \return a fully-qualified location URI for a data file + virtual std::string NewDataLocation(const std::string& filename) = 0; + + /// \brief Return a fully-qualified data file location for the given partition and + /// filename. + /// + /// \param spec a partition spec + /// \param partition_data a tuple of partition data for data in the file, matching the + /// given spec + /// \param filename a file name + /// \return a fully-qualified location URI for a data file + /// + /// TODO(wgtmac): StructLike is not well thought yet, we may wrap an ArrowArray + /// with single row in StructLike. + virtual std::string NewDataLocation(const PartitionSpec& spec, + const StructLike& partition_data, + const std::string& filename) = 0; +}; + +} // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 1d3700804..e4a974bb8 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,17 +19,91 @@ #pragma once +#include #include +#include +#include "iceberg/expected.h" #include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" namespace iceberg { -/// \brief The metadata of an Iceberg table. +/// \brief Represents an Iceberg table class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - virtual std::string print() const = 0; + + /// \brief Return the full name for this table + virtual std::string name() const = 0; + + /// \brief Returns the UUID of the table + virtual std::string uuid() const = 0; + + /// \brief Refresh the current table metadata + virtual void Refresh() = 0; + + /// \brief Return the schema for this table + virtual const std::shared_ptr& schema() const = 0; + + /// \brief Return a map of schema for this table + virtual std::map> schemas() const = 0; + + /// \brief Return the partition spec for this table + virtual const std::shared_ptr& spec() const = 0; + + /// \brief Return a map of partition specs for this table + virtual std::map> specs() const = 0; + + /// \brief Return the sort order for this table + virtual const std::shared_ptr& sort_order() const = 0; + + /// \brief Return a map of sort order IDs to sort orders for this table + virtual std::map> sort_orders() const = 0; + + /// \brief Return a map of string properties for this table + virtual std::map properties() const = 0; + + /// \brief Return the table's base location + virtual std::string location() const = 0; + + /// \brief Return the table's current snapshot + virtual const std::shared_ptr& current_snapshot() const = 0; + + /// \brief Get the snapshot of this table with the given id, or null if there is no + /// matching snapshot + /// + /// \param snapshot_id the ID of the snapshot to get + /// \return the Snapshot with the given id + virtual expected, Error> snapshot( + int64_t snapshot_id) const = 0; + + /// \brief Get the snapshots of this table + virtual std::vector> snapshots() const = 0; + + /// \brief Get the snapshot history of this table + /// + /// \return a vector of history entries + virtual std::vector> history() const = 0; + + /// \brief Create a new table scan for this table + /// + /// Once a table scan is created, it can be refined to project columns and filter data. + virtual std::shared_ptr NewScan() const = 0; + + /// \brief Create a new append API to add files to this table and commit + virtual std::shared_ptr NewAppend() = 0; + + /// \brief Create a new transaction API to commit multiple table operations at once + virtual std::unique_ptr NewTransaction() = 0; + + /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an + /// IO-less design in the core library. + // /// \brief Returns a FileIO to read and write table data and metadata files + // virtual std::shared_ptr io() const = 0; + + /// \brief Returns a LocationProvider to provide locations for new data files + virtual std::unique_ptr location_provider() const = 0; }; } // namespace iceberg diff --git a/src/iceberg/table_identifier.h b/src/iceberg/table_identifier.h new file mode 100644 index 000000000..b6571275c --- /dev/null +++ b/src/iceberg/table_identifier.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/table_identifier.h +/// A TableIdentifier is a unique identifier for a table + +#include +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg { + +/// \brief A namespace in a catalog. +struct ICEBERG_EXPORT Namespace { + std::vector levels; +}; + +/// \brief Identifies a table in iceberg catalog. +struct ICEBERG_EXPORT TableIdentifier { + Namespace name_space; + std::string name; +}; + +} // namespace iceberg diff --git a/src/iceberg/table_operations.h b/src/iceberg/table_operations.h new file mode 100644 index 000000000..6c6f07446 --- /dev/null +++ b/src/iceberg/table_operations.h @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/expected.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief Interface for table operations including metadata refresh and commit +class ICEBERG_EXPORT TableOperations { + public: + virtual ~TableOperations() = default; + + /// \brief Return the currently loaded table metadata, without checking for updates. + virtual std::shared_ptr Current() = 0; + + /// \brief Return the current table metadata after checking for updates. + virtual std::shared_ptr Refresh() = 0; + + /// \brief Replace the base table metadata with a new version. + /// + /// This method should implement and document atomicity guarantees. + /// + /// Implementations must check that the base metadata is current to avoid overwriting + /// updates. Once the atomic commit operation succeeds, implementations must not perform + /// any operations that may fail because failure in this method cannot be distinguished + /// from commit failure. + /// + /// Implementations must return Error::kCommitStateUnknown in cases where it cannot be + /// determined if the commit succeeded or failed. For example if a network partition + /// causes the confirmation of the commit to be lost, the implementation should return + /// Error::kCommitStateUnknown. This is important because downstream users of this API + /// need to know whether they can clean up the commit or not, if the state is unknown + /// then it is not safe to remove any files. All other exceptions will be treated as if + /// the commit has failed. + /// + /// \param base table metadata on which changes were based + /// \param metadata new table metadata with updates + virtual expected Commit(const TableMetadata& base, + const TableMetadata& metadata) = 0; + + /// \brief Returns an FileIO to read and write table data and metadata files. + // virtual std::shared_ptr io() const = 0; + + /// \brief Given the name of a metadata file, obtain the full path of that file using an + /// appropriate base location of the implementation's choosing. + virtual std::string MetadataFileLocation(const std::string& fileName) = 0; + + /// \brief Returns a LocationProvider that supplies locations for new new data files. + /// + /// \return a location provider configured for the current table state + virtual std::shared_ptr location_provider() const = 0; + + /// \brief Return a temporary TableOperations instance that uses configuration from + /// uncommitted metadata. + /// + /// This is called by transactions when uncommitted table metadata should be used; for + /// example, to create a metadata file location based on metadata in the transaction + /// that has not been committed. + /// + /// Transactions will not call `Refresh()` or `Commit()`. + /// + /// \param uncommittedMetadata uncommitted table metadata + /// \return a temporary table operations that behaves like the uncommitted metadata is + /// current + virtual std::unique_ptr Temp( + const TableMetadata& uncommittedMetadata) = 0; + + /// \brief Create a new ID for a Snapshot + virtual int64_t NewSnapshotId() = 0; +}; + +} // namespace iceberg diff --git a/src/iceberg/transaction.h b/src/iceberg/transaction.h new file mode 100644 index 000000000..0149f329d --- /dev/null +++ b/src/iceberg/transaction.h @@ -0,0 +1,53 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief A transaction for performing multiple updates to a table +class ICEBERG_EXPORT Transaction { + public: + virtual ~Transaction() = default; + + /// \brief Return the Table that this transaction will update + /// + /// \return this transaction's table + virtual const std::shared_ptr
& table() const = 0; + + /// \brief Create a new append API to add files to this table + /// + /// \return a new AppendFiles + virtual std::shared_ptr NewAppend() = 0; + + /// \brief Apply the pending changes from all actions and commit + /// + /// May throw ValidationException if any update cannot be applied to the current table + /// metadata. May throw CommitFailedException if the updates cannot be committed due to + /// conflicts. + virtual void CommitTransaction() = 0; +}; + +} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 89043938b..b56a05f46 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -81,4 +81,36 @@ class TimestampTzType; class Type; class UuidType; +/// \brief Error types for iceberg. +/// TODO: add more and sort them based on some rules. +enum class Error { + kNoSuchNamespace, + kAlreadyExists, + kNoSuchTable, + kCommitStateUnknown, +}; + +struct Namespace; +struct TableIdentifier; + +class Catalog; +class LocationProvider; +class Table; +class TableOperations; +class Transaction; + +/// ---------------------------------------------------------------------------- +/// TODO: Forward declarations below are not added yet. +/// ---------------------------------------------------------------------------- + +class HistoryEntry; +class PartitionSpec; +class Snapshot; +class SortOrder; +class StructLike; +class TableMetadata; + +class AppendFiles; +class TableScan; + } // namespace iceberg diff --git a/test/core/core_unittest.cc b/test/core/core_unittest.cc index 501f73df0..ddd9d509a 100644 --- a/test/core/core_unittest.cc +++ b/test/core/core_unittest.cc @@ -19,9 +19,9 @@ #include -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" TEST(TableTest, TestTableCons) { - auto table = iceberg::DemoTable(); - EXPECT_EQ(table.print(), "DemoTable"); + auto table = iceberg::Demo(); + EXPECT_EQ(table.print(), "Demo"); } From 1aaa348d5a427b8b13d84181a089fa4789eb4408 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Fri, 21 Mar 2025 16:40:21 +0800 Subject: [PATCH 2/3] use const ref and unique_ptr where possible --- src/iceberg/catalog.h | 13 +++++++------ src/iceberg/table.h | 23 ++++++++++++----------- src/iceberg/table_identifier.h | 2 +- src/iceberg/table_operations.h | 11 ++++++----- src/iceberg/type_fwd.h | 2 +- 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h index 7846e8b5d..c952ba569 100644 --- a/src/iceberg/catalog.h +++ b/src/iceberg/catalog.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "iceberg/expected.h" @@ -36,14 +37,14 @@ class ICEBERG_EXPORT Catalog { virtual ~Catalog() = default; /// \brief Return the name for this catalog - virtual std::string name() const = 0; + virtual std::string_view name() const = 0; /// \brief Return all the identifiers under this namespace /// /// \param ns a namespace /// \return a list of identifiers for tables or Error::kNoSuchNamespace /// if the namespace does not exist - virtual expected, Error> ListTables( + virtual expected, ErrorKind> ListTables( const Namespace& ns) const = 0; /// \brief Create a table @@ -54,7 +55,7 @@ class ICEBERG_EXPORT Catalog { /// \param location a location for the table; leave empty if unspecified /// \param properties a string map of table properties /// \return a Table instance or Error::kAlreadyExists if the table already exists - virtual expected, Error> CreateTable( + virtual expected, ErrorKind> CreateTable( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, const std::map& properties) = 0; @@ -68,7 +69,7 @@ class ICEBERG_EXPORT Catalog { /// \param properties a string map of table properties /// \return a Transaction to create the table or Error::kAlreadyExists if the table /// already exists - virtual expected, Error> NewCreateTableTransaction( + virtual expected, ErrorKind> NewCreateTableTransaction( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, const std::map& properties) = 0; @@ -94,7 +95,7 @@ class ICEBERG_EXPORT Catalog { /// \param identifier a table identifier /// \return instance of Table implementation referred to by identifier or /// Error::kNoSuchTable if the table does not exist - virtual expected, Error> LoadTable( + virtual expected, ErrorKind> LoadTable( const TableIdentifier& identifier) const = 0; /// \brief Register a table with the catalog if it does not exist @@ -102,7 +103,7 @@ class ICEBERG_EXPORT Catalog { /// \param identifier a table identifier /// \param metadata_file_location the location of a metadata file /// \return a Table instance or Error::kAlreadyExists if the table already exists - virtual expected, Error> RegisterTable( + virtual expected, ErrorKind> RegisterTable( const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; /// \brief Initialize a catalog given a custom name and a map of catalog properties diff --git a/src/iceberg/table.h b/src/iceberg/table.h index e4a974bb8..0dd6168fe 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include @@ -35,10 +36,10 @@ class ICEBERG_EXPORT Table { virtual ~Table() = default; /// \brief Return the full name for this table - virtual std::string name() const = 0; + virtual const std::string& name() const = 0; /// \brief Returns the UUID of the table - virtual std::string uuid() const = 0; + virtual const std::string& uuid() const = 0; /// \brief Refresh the current table metadata virtual void Refresh() = 0; @@ -47,25 +48,25 @@ class ICEBERG_EXPORT Table { virtual const std::shared_ptr& schema() const = 0; /// \brief Return a map of schema for this table - virtual std::map> schemas() const = 0; + virtual const std::map>& schemas() const = 0; /// \brief Return the partition spec for this table virtual const std::shared_ptr& spec() const = 0; /// \brief Return a map of partition specs for this table - virtual std::map> specs() const = 0; + virtual const std::map>& specs() const = 0; /// \brief Return the sort order for this table virtual const std::shared_ptr& sort_order() const = 0; /// \brief Return a map of sort order IDs to sort orders for this table - virtual std::map> sort_orders() const = 0; + virtual const std::map>& sort_orders() const = 0; /// \brief Return a map of string properties for this table - virtual std::map properties() const = 0; + virtual const std::map& properties() const = 0; /// \brief Return the table's base location - virtual std::string location() const = 0; + virtual const std::string& location() const = 0; /// \brief Return the table's current snapshot virtual const std::shared_ptr& current_snapshot() const = 0; @@ -75,21 +76,21 @@ class ICEBERG_EXPORT Table { /// /// \param snapshot_id the ID of the snapshot to get /// \return the Snapshot with the given id - virtual expected, Error> snapshot( + virtual expected, ErrorKind> snapshot( int64_t snapshot_id) const = 0; /// \brief Get the snapshots of this table - virtual std::vector> snapshots() const = 0; + virtual const std::vector>& snapshots() const = 0; /// \brief Get the snapshot history of this table /// /// \return a vector of history entries - virtual std::vector> history() const = 0; + virtual const std::vector>& history() const = 0; /// \brief Create a new table scan for this table /// /// Once a table scan is created, it can be refined to project columns and filter data. - virtual std::shared_ptr NewScan() const = 0; + virtual std::unique_ptr NewScan() const = 0; /// \brief Create a new append API to add files to this table and commit virtual std::shared_ptr NewAppend() = 0; diff --git a/src/iceberg/table_identifier.h b/src/iceberg/table_identifier.h index b6571275c..9aa5770a1 100644 --- a/src/iceberg/table_identifier.h +++ b/src/iceberg/table_identifier.h @@ -36,7 +36,7 @@ struct ICEBERG_EXPORT Namespace { /// \brief Identifies a table in iceberg catalog. struct ICEBERG_EXPORT TableIdentifier { - Namespace name_space; + Namespace ns; std::string name; }; diff --git a/src/iceberg/table_operations.h b/src/iceberg/table_operations.h index 6c6f07446..ed46b9b1e 100644 --- a/src/iceberg/table_operations.h +++ b/src/iceberg/table_operations.h @@ -19,6 +19,7 @@ #pragma once +#include #include #include "iceberg/expected.h" @@ -33,10 +34,10 @@ class ICEBERG_EXPORT TableOperations { virtual ~TableOperations() = default; /// \brief Return the currently loaded table metadata, without checking for updates. - virtual std::shared_ptr Current() = 0; + virtual const std::shared_ptr& Current() = 0; /// \brief Return the current table metadata after checking for updates. - virtual std::shared_ptr Refresh() = 0; + virtual const std::shared_ptr& Refresh() = 0; /// \brief Replace the base table metadata with a new version. /// @@ -57,8 +58,8 @@ class ICEBERG_EXPORT TableOperations { /// /// \param base table metadata on which changes were based /// \param metadata new table metadata with updates - virtual expected Commit(const TableMetadata& base, - const TableMetadata& metadata) = 0; + virtual expected Commit(const TableMetadata& base, + const TableMetadata& metadata) = 0; /// \brief Returns an FileIO to read and write table data and metadata files. // virtual std::shared_ptr io() const = 0; @@ -70,7 +71,7 @@ class ICEBERG_EXPORT TableOperations { /// \brief Returns a LocationProvider that supplies locations for new new data files. /// /// \return a location provider configured for the current table state - virtual std::shared_ptr location_provider() const = 0; + virtual std::unique_ptr location_provider() const = 0; /// \brief Return a temporary TableOperations instance that uses configuration from /// uncommitted metadata. diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index b56a05f46..b2ed62e2c 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -83,7 +83,7 @@ class UuidType; /// \brief Error types for iceberg. /// TODO: add more and sort them based on some rules. -enum class Error { +enum class ErrorKind { kNoSuchNamespace, kAlreadyExists, kNoSuchTable, From 2d7ceaa67ee694b4b62d9de3fd5f4634e73caf12 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 25 Mar 2025 14:42:11 +0800 Subject: [PATCH 3/3] remove TableOperations --- src/iceberg/catalog.h | 37 +++++++++---- src/iceberg/demo.cc | 1 - src/iceberg/error.h | 43 +++++++++++++++ src/iceberg/table.h | 5 +- src/iceberg/table_operations.h | 95 ---------------------------------- src/iceberg/type_fwd.h | 13 ++--- 6 files changed, 75 insertions(+), 119 deletions(-) create mode 100644 src/iceberg/error.h delete mode 100644 src/iceberg/table_operations.h diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h index c952ba569..8ab51dd0b 100644 --- a/src/iceberg/catalog.h +++ b/src/iceberg/catalog.h @@ -25,6 +25,7 @@ #include #include +#include "iceberg/error.h" #include "iceberg/expected.h" #include "iceberg/table_identifier.h" #include "iceberg/type_fwd.h" @@ -32,6 +33,9 @@ namespace iceberg { /// \brief A Catalog API for table create, drop, and load operations. +/// +/// Note that these functions are named after the corresponding operationId +/// specified by the Iceberg Rest Catalog API. class ICEBERG_EXPORT Catalog { public: virtual ~Catalog() = default; @@ -42,9 +46,9 @@ class ICEBERG_EXPORT Catalog { /// \brief Return all the identifiers under this namespace /// /// \param ns a namespace - /// \return a list of identifiers for tables or Error::kNoSuchNamespace + /// \return a list of identifiers for tables or ErrorKind::kNoSuchNamespace /// if the namespace does not exist - virtual expected, ErrorKind> ListTables( + virtual expected, Error> ListTables( const Namespace& ns) const = 0; /// \brief Create a table @@ -54,12 +58,23 @@ class ICEBERG_EXPORT Catalog { /// \param spec a partition spec /// \param location a location for the table; leave empty if unspecified /// \param properties a string map of table properties - /// \return a Table instance or Error::kAlreadyExists if the table already exists - virtual expected, ErrorKind> CreateTable( + /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists + virtual expected, Error> CreateTable( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, const std::map& properties) = 0; + /// \brief Update a table + /// + /// \param identifier a table identifier + /// \param requirements a list of table requirements + /// \param updates a list of table updates + /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists + virtual expected, Error> UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) = 0; + /// \brief Start a transaction to create a table /// /// \param identifier a table identifier @@ -67,9 +82,9 @@ class ICEBERG_EXPORT Catalog { /// \param spec a partition spec /// \param location a location for the table; leave empty if unspecified /// \param properties a string map of table properties - /// \return a Transaction to create the table or Error::kAlreadyExists if the table + /// \return a Transaction to create the table or ErrorKind::kAlreadyExists if the table /// already exists - virtual expected, ErrorKind> NewCreateTableTransaction( + virtual expected, Error> StageCreateTable( const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, const std::string& location, const std::map& properties) = 0; @@ -94,16 +109,16 @@ class ICEBERG_EXPORT Catalog { /// /// \param identifier a table identifier /// \return instance of Table implementation referred to by identifier or - /// Error::kNoSuchTable if the table does not exist - virtual expected, ErrorKind> LoadTable( + /// ErrorKind::kNoSuchTable if the table does not exist + virtual expected, Error> LoadTable( const TableIdentifier& identifier) const = 0; /// \brief Register a table with the catalog if it does not exist /// /// \param identifier a table identifier /// \param metadata_file_location the location of a metadata file - /// \return a Table instance or Error::kAlreadyExists if the table already exists - virtual expected, ErrorKind> RegisterTable( + /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists + virtual expected, Error> RegisterTable( const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; /// \brief Initialize a catalog given a custom name and a map of catalog properties @@ -172,7 +187,7 @@ class ICEBERG_EXPORT Catalog { /// \brief Starts a transaction to create the table /// /// \return the Transaction to create the table - virtual std::unique_ptr CreateTransaction() = 0; + virtual std::unique_ptr StageCreate() = 0; }; }; diff --git a/src/iceberg/demo.cc b/src/iceberg/demo.cc index 433d56792..c967915fb 100644 --- a/src/iceberg/demo.cc +++ b/src/iceberg/demo.cc @@ -24,7 +24,6 @@ #include "iceberg/location_provider.h" #include "iceberg/puffin.h" #include "iceberg/table.h" -#include "iceberg/table_operations.h" #include "iceberg/transaction.h" namespace iceberg { diff --git a/src/iceberg/error.h b/src/iceberg/error.h new file mode 100644 index 000000000..77414f900 --- /dev/null +++ b/src/iceberg/error.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg { + +/// \brief Error types for iceberg. +/// TODO: add more and sort them based on some rules. +enum class ErrorKind { + kNoSuchNamespace, + kAlreadyExists, + kNoSuchTable, + kCommitStateUnknown, +}; + +/// \brief Error with a kind and a message. +struct ICEBERG_EXPORT [[nodiscard]] Error { + ErrorKind kind; + std::string message; +}; + +} // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 0dd6168fe..a62b41b57 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -24,6 +24,7 @@ #include #include +#include "iceberg/error.h" #include "iceberg/expected.h" #include "iceberg/iceberg_export.h" #include "iceberg/type_fwd.h" @@ -42,7 +43,7 @@ class ICEBERG_EXPORT Table { virtual const std::string& uuid() const = 0; /// \brief Refresh the current table metadata - virtual void Refresh() = 0; + virtual expected Refresh() = 0; /// \brief Return the schema for this table virtual const std::shared_ptr& schema() const = 0; @@ -76,7 +77,7 @@ class ICEBERG_EXPORT Table { /// /// \param snapshot_id the ID of the snapshot to get /// \return the Snapshot with the given id - virtual expected, ErrorKind> snapshot( + virtual expected, Error> snapshot( int64_t snapshot_id) const = 0; /// \brief Get the snapshots of this table diff --git a/src/iceberg/table_operations.h b/src/iceberg/table_operations.h deleted file mode 100644 index ed46b9b1e..000000000 --- a/src/iceberg/table_operations.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#pragma once - -#include -#include - -#include "iceberg/expected.h" -#include "iceberg/iceberg_export.h" -#include "iceberg/type_fwd.h" - -namespace iceberg { - -/// \brief Interface for table operations including metadata refresh and commit -class ICEBERG_EXPORT TableOperations { - public: - virtual ~TableOperations() = default; - - /// \brief Return the currently loaded table metadata, without checking for updates. - virtual const std::shared_ptr& Current() = 0; - - /// \brief Return the current table metadata after checking for updates. - virtual const std::shared_ptr& Refresh() = 0; - - /// \brief Replace the base table metadata with a new version. - /// - /// This method should implement and document atomicity guarantees. - /// - /// Implementations must check that the base metadata is current to avoid overwriting - /// updates. Once the atomic commit operation succeeds, implementations must not perform - /// any operations that may fail because failure in this method cannot be distinguished - /// from commit failure. - /// - /// Implementations must return Error::kCommitStateUnknown in cases where it cannot be - /// determined if the commit succeeded or failed. For example if a network partition - /// causes the confirmation of the commit to be lost, the implementation should return - /// Error::kCommitStateUnknown. This is important because downstream users of this API - /// need to know whether they can clean up the commit or not, if the state is unknown - /// then it is not safe to remove any files. All other exceptions will be treated as if - /// the commit has failed. - /// - /// \param base table metadata on which changes were based - /// \param metadata new table metadata with updates - virtual expected Commit(const TableMetadata& base, - const TableMetadata& metadata) = 0; - - /// \brief Returns an FileIO to read and write table data and metadata files. - // virtual std::shared_ptr io() const = 0; - - /// \brief Given the name of a metadata file, obtain the full path of that file using an - /// appropriate base location of the implementation's choosing. - virtual std::string MetadataFileLocation(const std::string& fileName) = 0; - - /// \brief Returns a LocationProvider that supplies locations for new new data files. - /// - /// \return a location provider configured for the current table state - virtual std::unique_ptr location_provider() const = 0; - - /// \brief Return a temporary TableOperations instance that uses configuration from - /// uncommitted metadata. - /// - /// This is called by transactions when uncommitted table metadata should be used; for - /// example, to create a metadata file location based on metadata in the transaction - /// that has not been committed. - /// - /// Transactions will not call `Refresh()` or `Commit()`. - /// - /// \param uncommittedMetadata uncommitted table metadata - /// \return a temporary table operations that behaves like the uncommitted metadata is - /// current - virtual std::unique_ptr Temp( - const TableMetadata& uncommittedMetadata) = 0; - - /// \brief Create a new ID for a Snapshot - virtual int64_t NewSnapshotId() = 0; -}; - -} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index b2ed62e2c..bcbf84b72 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -81,22 +81,12 @@ class TimestampTzType; class Type; class UuidType; -/// \brief Error types for iceberg. -/// TODO: add more and sort them based on some rules. -enum class ErrorKind { - kNoSuchNamespace, - kAlreadyExists, - kNoSuchTable, - kCommitStateUnknown, -}; - struct Namespace; struct TableIdentifier; class Catalog; class LocationProvider; class Table; -class TableOperations; class Transaction; /// ---------------------------------------------------------------------------- @@ -110,6 +100,9 @@ class SortOrder; class StructLike; class TableMetadata; +class MetadataUpdate; +class UpdateRequirement; + class AppendFiles; class TableScan;