diff --git a/example/demo_example.cc b/example/demo_example.cc index f584339ed..ef492bc17 100644 --- a/example/demo_example.cc +++ b/example/demo_example.cc @@ -21,11 +21,11 @@ #include "iceberg/arrow/demo_arrow.h" #include "iceberg/avro/demo_avro.h" -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" #include "iceberg/puffin/demo_puffin.h" int main() { - std::cout << iceberg::DemoTable().print() << std::endl; + std::cout << iceberg::Demo().print() << std::endl; std::cout << iceberg::puffin::DemoPuffin().print() << std::endl; std::cout << iceberg::arrow::DemoArrow().print() << std::endl; std::cout << iceberg::avro::DemoAvro().print() << std::endl; diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 6ecea2116..a182ac3a4 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -19,7 +19,7 @@ set(ICEBERG_INCLUDES "$" "$") set(ICEBERG_SOURCES arrow_c_data_internal.cc - demo_table.cc + demo.cc schema.cc schema_field.cc type.cc) diff --git a/src/iceberg/arrow/demo_arrow.cc b/src/iceberg/arrow/demo_arrow.cc index 22c2b2a5d..e67a0740a 100644 --- a/src/iceberg/arrow/demo_arrow.cc +++ b/src/iceberg/arrow/demo_arrow.cc @@ -21,13 +21,12 @@ #include -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" namespace iceberg::arrow { std::string DemoArrow::print() const { - return DemoTable().print() + - ", Arrow version: " + ::arrow::GetBuildInfo().version_string; + return Demo().print() + ", Arrow version: " + ::arrow::GetBuildInfo().version_string; } } // namespace iceberg::arrow diff --git a/src/iceberg/arrow/demo_arrow.h b/src/iceberg/arrow/demo_arrow.h index 61ac953c5..569458fa3 100644 --- a/src/iceberg/arrow/demo_arrow.h +++ b/src/iceberg/arrow/demo_arrow.h @@ -22,11 +22,11 @@ #include #include "iceberg/arrow/iceberg_arrow_export.h" -#include "iceberg/table.h" +#include "iceberg/demo.h" namespace iceberg::arrow { -class ICEBERG_ARROW_EXPORT DemoArrow : public Table { +class ICEBERG_ARROW_EXPORT DemoArrow : public Demo { public: DemoArrow() = default; ~DemoArrow() override = default; diff --git a/src/iceberg/avro/demo_avro.cc b/src/iceberg/avro/demo_avro.cc index b4bf00ef3..2ff772e99 100644 --- a/src/iceberg/avro/demo_avro.cc +++ b/src/iceberg/avro/demo_avro.cc @@ -23,7 +23,7 @@ #include "avro/Compiler.hh" #include "avro/ValidSchema.hh" -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" namespace iceberg::avro { diff --git a/src/iceberg/catalog.h b/src/iceberg/catalog.h new file mode 100644 index 000000000..8ab51dd0b --- /dev/null +++ b/src/iceberg/catalog.h @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/table_identifier.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief A Catalog API for table create, drop, and load operations. +/// +/// Note that these functions are named after the corresponding operationId +/// specified by the Iceberg Rest Catalog API. +class ICEBERG_EXPORT Catalog { + public: + virtual ~Catalog() = default; + + /// \brief Return the name for this catalog + virtual std::string_view name() const = 0; + + /// \brief Return all the identifiers under this namespace + /// + /// \param ns a namespace + /// \return a list of identifiers for tables or ErrorKind::kNoSuchNamespace + /// if the namespace does not exist + virtual expected, Error> ListTables( + const Namespace& ns) const = 0; + + /// \brief Create a table + /// + /// \param identifier a table identifier + /// \param schema a schema + /// \param spec a partition spec + /// \param location a location for the table; leave empty if unspecified + /// \param properties a string map of table properties + /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists + virtual expected, Error> CreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::map& properties) = 0; + + /// \brief Update a table + /// + /// \param identifier a table identifier + /// \param requirements a list of table requirements + /// \param updates a list of table updates + /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists + virtual expected, Error> UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) = 0; + + /// \brief Start a transaction to create a table + /// + /// \param identifier a table identifier + /// \param schema a schema + /// \param spec a partition spec + /// \param location a location for the table; leave empty if unspecified + /// \param properties a string map of table properties + /// \return a Transaction to create the table or ErrorKind::kAlreadyExists if the table + /// already exists + virtual expected, Error> StageCreateTable( + const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec, + const std::string& location, + const std::map& properties) = 0; + + /// \brief Check whether table exists + /// + /// \param identifier a table identifier + /// \return true if the table exists, false otherwise + virtual bool TableExists(const TableIdentifier& identifier) const = 0; + + /// \brief Drop a table; optionally delete data and metadata files + /// + /// If purge is set to true the implementation should delete all data and metadata + /// files. + /// + /// \param identifier a table identifier + /// \param purge if true, delete all data and metadata files in the table + /// \return true if the table was dropped, false if the table did not exist + virtual bool DropTable(const TableIdentifier& identifier, bool purge) = 0; + + /// \brief Load a table + /// + /// \param identifier a table identifier + /// \return instance of Table implementation referred to by identifier or + /// ErrorKind::kNoSuchTable if the table does not exist + virtual expected, Error> LoadTable( + const TableIdentifier& identifier) const = 0; + + /// \brief Register a table with the catalog if it does not exist + /// + /// \param identifier a table identifier + /// \param metadata_file_location the location of a metadata file + /// \return a Table instance or ErrorKind::kAlreadyExists if the table already exists + virtual expected, Error> RegisterTable( + const TableIdentifier& identifier, const std::string& metadata_file_location) = 0; + + /// \brief Initialize a catalog given a custom name and a map of catalog properties + /// + /// A custom Catalog implementation must have a default constructor. A compute engine + /// will first initialize the catalog without any arguments, and then call this method + /// to complete catalog initialization with properties passed into the engine. + /// + /// \param name a custom name for the catalog + /// \param properties catalog properties + virtual void Initialize(const std::string& name, + const std::map& properties) = 0; + + /// \brief Instantiate a builder to either create a table or start a create/replace + /// transaction + /// + /// \param identifier a table identifier + /// \param schema a schema + /// \return the builder to create a table or start a create/replace transaction + virtual std::unique_ptr BuildTable( + const TableIdentifier& identifier, const Schema& schema) const = 0; + + /// \brief A builder used to create valid tables or start create/replace transactions + class TableBuilder { + public: + virtual ~TableBuilder() = default; + + /// \brief Sets a partition spec for the table + /// + /// \param spec a partition spec + /// \return this for method chaining + virtual TableBuilder& WithPartitionSpec(const PartitionSpec& spec) = 0; + + /// \brief Sets a sort order for the table + /// + /// \param sort_order a sort order + /// \return this for method chaining + virtual TableBuilder& WithSortOrder(const SortOrder& sort_order) = 0; + + /// \brief Sets a location for the table + /// + /// \param location a location + /// \return this for method chaining + virtual TableBuilder& WithLocation(const std::string& location) = 0; + + /// \brief Adds key/value properties to the table + /// + /// \param properties key/value properties + /// \return this for method chaining + virtual TableBuilder& WithProperties( + const std::map& properties) = 0; + + /// \brief Adds a key/value property to the table + /// + /// \param key a key + /// \param value a value + /// \return this for method chaining + virtual TableBuilder& WithProperty(const std::string& key, + const std::string& value) = 0; + + /// \brief Creates the table + /// + /// \return the created table + virtual std::unique_ptr Create() = 0; + + /// \brief Starts a transaction to create the table + /// + /// \return the Transaction to create the table + virtual std::unique_ptr StageCreate() = 0; + }; +}; + +} // namespace iceberg diff --git a/src/iceberg/demo_table.cc b/src/iceberg/demo.cc similarity index 82% rename from src/iceberg/demo_table.cc rename to src/iceberg/demo.cc index 9e46bdadc..c967915fb 100644 --- a/src/iceberg/demo_table.cc +++ b/src/iceberg/demo.cc @@ -17,13 +17,17 @@ * under the License. */ -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" #include "iceberg/avro.h" // include to export symbols +#include "iceberg/catalog.h" +#include "iceberg/location_provider.h" #include "iceberg/puffin.h" +#include "iceberg/table.h" +#include "iceberg/transaction.h" namespace iceberg { -std::string DemoTable::print() const { return "DemoTable"; } +std::string Demo::print() const { return "Demo"; } } // namespace iceberg diff --git a/src/iceberg/demo_table.h b/src/iceberg/demo.h similarity index 83% rename from src/iceberg/demo_table.h rename to src/iceberg/demo.h index 2dabaa5ca..7e810f9c7 100644 --- a/src/iceberg/demo_table.h +++ b/src/iceberg/demo.h @@ -19,16 +19,18 @@ #pragma once -#include "iceberg/table.h" +#include + +#include "iceberg/iceberg_export.h" namespace iceberg { -class ICEBERG_EXPORT DemoTable : public Table { +class ICEBERG_EXPORT Demo { public: - DemoTable() = default; - ~DemoTable() override = default; + Demo() = default; + virtual ~Demo() = default; - std::string print() const override; + virtual std::string print() const; }; } // namespace iceberg diff --git a/src/iceberg/error.h b/src/iceberg/error.h new file mode 100644 index 000000000..77414f900 --- /dev/null +++ b/src/iceberg/error.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg { + +/// \brief Error types for iceberg. +/// TODO: add more and sort them based on some rules. +enum class ErrorKind { + kNoSuchNamespace, + kAlreadyExists, + kNoSuchTable, + kCommitStateUnknown, +}; + +/// \brief Error with a kind and a message. +struct ICEBERG_EXPORT [[nodiscard]] Error { + ErrorKind kind; + std::string message; +}; + +} // namespace iceberg diff --git a/src/iceberg/location_provider.h b/src/iceberg/location_provider.h new file mode 100644 index 000000000..90c63eb68 --- /dev/null +++ b/src/iceberg/location_provider.h @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief Interface for providing data file locations to write tasks. +class ICEBERG_EXPORT LocationProvider { + public: + virtual ~LocationProvider() = default; + + /// \brief Return a fully-qualified data file location for the given filename. + /// + /// \param filename a file name + /// \return a fully-qualified location URI for a data file + virtual std::string NewDataLocation(const std::string& filename) = 0; + + /// \brief Return a fully-qualified data file location for the given partition and + /// filename. + /// + /// \param spec a partition spec + /// \param partition_data a tuple of partition data for data in the file, matching the + /// given spec + /// \param filename a file name + /// \return a fully-qualified location URI for a data file + /// + /// TODO(wgtmac): StructLike is not well thought yet, we may wrap an ArrowArray + /// with single row in StructLike. + virtual std::string NewDataLocation(const PartitionSpec& spec, + const StructLike& partition_data, + const std::string& filename) = 0; +}; + +} // namespace iceberg diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 1d3700804..a62b41b57 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -19,17 +19,93 @@ #pragma once +#include +#include #include +#include +#include "iceberg/error.h" +#include "iceberg/expected.h" #include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" namespace iceberg { -/// \brief The metadata of an Iceberg table. +/// \brief Represents an Iceberg table class ICEBERG_EXPORT Table { public: virtual ~Table() = default; - virtual std::string print() const = 0; + + /// \brief Return the full name for this table + virtual const std::string& name() const = 0; + + /// \brief Returns the UUID of the table + virtual const std::string& uuid() const = 0; + + /// \brief Refresh the current table metadata + virtual expected Refresh() = 0; + + /// \brief Return the schema for this table + virtual const std::shared_ptr& schema() const = 0; + + /// \brief Return a map of schema for this table + virtual const std::map>& schemas() const = 0; + + /// \brief Return the partition spec for this table + virtual const std::shared_ptr& spec() const = 0; + + /// \brief Return a map of partition specs for this table + virtual const std::map>& specs() const = 0; + + /// \brief Return the sort order for this table + virtual const std::shared_ptr& sort_order() const = 0; + + /// \brief Return a map of sort order IDs to sort orders for this table + virtual const std::map>& sort_orders() const = 0; + + /// \brief Return a map of string properties for this table + virtual const std::map& properties() const = 0; + + /// \brief Return the table's base location + virtual const std::string& location() const = 0; + + /// \brief Return the table's current snapshot + virtual const std::shared_ptr& current_snapshot() const = 0; + + /// \brief Get the snapshot of this table with the given id, or null if there is no + /// matching snapshot + /// + /// \param snapshot_id the ID of the snapshot to get + /// \return the Snapshot with the given id + virtual expected, Error> snapshot( + int64_t snapshot_id) const = 0; + + /// \brief Get the snapshots of this table + virtual const std::vector>& snapshots() const = 0; + + /// \brief Get the snapshot history of this table + /// + /// \return a vector of history entries + virtual const std::vector>& history() const = 0; + + /// \brief Create a new table scan for this table + /// + /// Once a table scan is created, it can be refined to project columns and filter data. + virtual std::unique_ptr NewScan() const = 0; + + /// \brief Create a new append API to add files to this table and commit + virtual std::shared_ptr NewAppend() = 0; + + /// \brief Create a new transaction API to commit multiple table operations at once + virtual std::unique_ptr NewTransaction() = 0; + + /// TODO(wgtmac): design of FileIO is not finalized yet. We intend to use an + /// IO-less design in the core library. + // /// \brief Returns a FileIO to read and write table data and metadata files + // virtual std::shared_ptr io() const = 0; + + /// \brief Returns a LocationProvider to provide locations for new data files + virtual std::unique_ptr location_provider() const = 0; }; } // namespace iceberg diff --git a/src/iceberg/table_identifier.h b/src/iceberg/table_identifier.h new file mode 100644 index 000000000..9aa5770a1 --- /dev/null +++ b/src/iceberg/table_identifier.h @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/table_identifier.h +/// A TableIdentifier is a unique identifier for a table + +#include +#include + +#include "iceberg/iceberg_export.h" + +namespace iceberg { + +/// \brief A namespace in a catalog. +struct ICEBERG_EXPORT Namespace { + std::vector levels; +}; + +/// \brief Identifies a table in iceberg catalog. +struct ICEBERG_EXPORT TableIdentifier { + Namespace ns; + std::string name; +}; + +} // namespace iceberg diff --git a/src/iceberg/transaction.h b/src/iceberg/transaction.h new file mode 100644 index 000000000..0149f329d --- /dev/null +++ b/src/iceberg/transaction.h @@ -0,0 +1,53 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief A transaction for performing multiple updates to a table +class ICEBERG_EXPORT Transaction { + public: + virtual ~Transaction() = default; + + /// \brief Return the Table that this transaction will update + /// + /// \return this transaction's table + virtual const std::shared_ptr
& table() const = 0; + + /// \brief Create a new append API to add files to this table + /// + /// \return a new AppendFiles + virtual std::shared_ptr NewAppend() = 0; + + /// \brief Apply the pending changes from all actions and commit + /// + /// May throw ValidationException if any update cannot be applied to the current table + /// metadata. May throw CommitFailedException if the updates cannot be committed due to + /// conflicts. + virtual void CommitTransaction() = 0; +}; + +} // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 89043938b..bcbf84b72 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -81,4 +81,29 @@ class TimestampTzType; class Type; class UuidType; +struct Namespace; +struct TableIdentifier; + +class Catalog; +class LocationProvider; +class Table; +class Transaction; + +/// ---------------------------------------------------------------------------- +/// TODO: Forward declarations below are not added yet. +/// ---------------------------------------------------------------------------- + +class HistoryEntry; +class PartitionSpec; +class Snapshot; +class SortOrder; +class StructLike; +class TableMetadata; + +class MetadataUpdate; +class UpdateRequirement; + +class AppendFiles; +class TableScan; + } // namespace iceberg diff --git a/test/core/core_unittest.cc b/test/core/core_unittest.cc index 501f73df0..ddd9d509a 100644 --- a/test/core/core_unittest.cc +++ b/test/core/core_unittest.cc @@ -19,9 +19,9 @@ #include -#include "iceberg/demo_table.h" +#include "iceberg/demo.h" TEST(TableTest, TestTableCons) { - auto table = iceberg::DemoTable(); - EXPECT_EQ(table.print(), "DemoTable"); + auto table = iceberg::Demo(); + EXPECT_EQ(table.print(), "Demo"); }