Skip to content

Commit 0279fdc

Browse files
lishuxushuxu.li
andauthored
feat: RegisterTable support for InMemoryCatalog (#142)
Note: Since the LoadTable interface needs to return a Table object that holds a shared_from_this pointer to the catalog, I remove InMemoryCatalog inheritance from Catalog and instead directly implement the interface in InMemoryCatalog. --------- Co-authored-by: shuxu.li <[email protected]>
1 parent 84565b5 commit 0279fdc

File tree

5 files changed

+126
-196
lines changed

5 files changed

+126
-196
lines changed

src/iceberg/catalog.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,7 @@ class ICEBERG_EXPORT Catalog {
166166
/// \param identifier a table identifier
167167
/// \return instance of Table implementation referred to by identifier or
168168
/// ErrorKind::kNoSuchTable if the table does not exist
169-
virtual Result<std::shared_ptr<Table>> LoadTable(
170-
const TableIdentifier& identifier) const = 0;
169+
virtual Result<std::unique_ptr<Table>> LoadTable(const TableIdentifier& identifier) = 0;
171170

172171
/// \brief Register a table with the catalog if it does not exist
173172
///

src/iceberg/catalog/in_memory_catalog.cc

Lines changed: 45 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,14 @@
2121

2222
#include <algorithm>
2323
#include <iterator> // IWYU pragma: keep
24-
#include <mutex>
25-
#include <unordered_map>
2624

2725
#include "iceberg/exception.h"
2826
#include "iceberg/table.h"
27+
#include "iceberg/table_metadata.h"
2928
#include "iceberg/util/macros.h"
3029

3130
namespace iceberg {
3231

33-
namespace {
34-
3532
/// \brief A hierarchical namespace that manages namespaces and table metadata in-memory.
3633
///
3734
/// Each InMemoryNamespace represents a namespace level and can contain properties,
@@ -317,117 +314,56 @@ Result<std::string> InMemoryNamespace::GetTableMetadataLocation(
317314
return it->second;
318315
}
319316

320-
} // namespace
321-
322-
class ICEBERG_EXPORT InMemoryCatalogImpl {
323-
public:
324-
InMemoryCatalogImpl(std::string name, std::shared_ptr<FileIO> file_io,
325-
std::string warehouse_location,
326-
std::unordered_map<std::string, std::string> properties);
327-
328-
std::string_view name() const;
329-
330-
Status CreateNamespace(const Namespace& ns,
331-
const std::unordered_map<std::string, std::string>& properties);
332-
333-
Result<std::vector<Namespace>> ListNamespaces(const Namespace& ns) const;
334-
335-
Status DropNamespace(const Namespace& ns);
336-
337-
Result<bool> NamespaceExists(const Namespace& ns) const;
338-
339-
Result<std::unordered_map<std::string, std::string>> GetNamespaceProperties(
340-
const Namespace& ns) const;
341-
342-
Status UpdateNamespaceProperties(
343-
const Namespace& ns, const std::unordered_map<std::string, std::string>& updates,
344-
const std::unordered_set<std::string>& removals);
345-
346-
Result<std::vector<TableIdentifier>> ListTables(const Namespace& ns) const;
347-
348-
Result<std::unique_ptr<Table>> CreateTable(
349-
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
350-
const std::string& location,
351-
const std::unordered_map<std::string, std::string>& properties);
352-
353-
Result<std::unique_ptr<Table>> UpdateTable(
354-
const TableIdentifier& identifier,
355-
const std::vector<std::unique_ptr<UpdateRequirement>>& requirements,
356-
const std::vector<std::unique_ptr<MetadataUpdate>>& updates);
357-
358-
Result<std::shared_ptr<Transaction>> StageCreateTable(
359-
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
360-
const std::string& location,
361-
const std::unordered_map<std::string, std::string>& properties);
362-
363-
Result<bool> TableExists(const TableIdentifier& identifier) const;
364-
365-
Status DropTable(const TableIdentifier& identifier, bool purge);
366-
367-
Result<std::shared_ptr<Table>> LoadTable(const TableIdentifier& identifier) const;
368-
369-
Result<std::shared_ptr<Table>> RegisterTable(const TableIdentifier& identifier,
370-
const std::string& metadata_file_location);
371-
372-
std::unique_ptr<TableBuilder> BuildTable(const TableIdentifier& identifier,
373-
const Schema& schema) const;
374-
375-
private:
376-
std::string catalog_name_;
377-
std::unordered_map<std::string, std::string> properties_;
378-
std::shared_ptr<FileIO> file_io_;
379-
std::string warehouse_location_;
380-
std::unique_ptr<class InMemoryNamespace> root_namespace_;
381-
mutable std::recursive_mutex mutex_;
382-
};
383-
384-
InMemoryCatalogImpl::InMemoryCatalogImpl(
385-
std::string name, std::shared_ptr<FileIO> file_io, std::string warehouse_location,
386-
std::unordered_map<std::string, std::string> properties)
317+
InMemoryCatalog::InMemoryCatalog(
318+
std::string const& name, std::shared_ptr<FileIO> const& file_io,
319+
std::string const& warehouse_location,
320+
std::unordered_map<std::string, std::string> const& properties)
387321
: catalog_name_(std::move(name)),
388322
properties_(std::move(properties)),
389323
file_io_(std::move(file_io)),
390324
warehouse_location_(std::move(warehouse_location)),
391325
root_namespace_(std::make_unique<InMemoryNamespace>()) {}
392326

393-
std::string_view InMemoryCatalogImpl::name() const { return catalog_name_; }
327+
InMemoryCatalog::~InMemoryCatalog() = default;
328+
329+
std::string_view InMemoryCatalog::name() const { return catalog_name_; }
394330

395-
Status InMemoryCatalogImpl::CreateNamespace(
331+
Status InMemoryCatalog::CreateNamespace(
396332
const Namespace& ns, const std::unordered_map<std::string, std::string>& properties) {
397333
std::unique_lock lock(mutex_);
398334
return root_namespace_->CreateNamespace(ns, properties);
399335
}
400336

401-
Result<std::vector<Namespace>> InMemoryCatalogImpl::ListNamespaces(
337+
Result<std::unordered_map<std::string, std::string>>
338+
InMemoryCatalog::GetNamespaceProperties(const Namespace& ns) const {
339+
std::unique_lock lock(mutex_);
340+
return root_namespace_->GetProperties(ns);
341+
}
342+
343+
Result<std::vector<Namespace>> InMemoryCatalog::ListNamespaces(
402344
const Namespace& ns) const {
403345
std::unique_lock lock(mutex_);
404346
return root_namespace_->ListNamespaces(ns);
405347
}
406348

407-
Status InMemoryCatalogImpl::DropNamespace(const Namespace& ns) {
349+
Status InMemoryCatalog::DropNamespace(const Namespace& ns) {
408350
std::unique_lock lock(mutex_);
409351
return root_namespace_->DropNamespace(ns);
410352
}
411353

412-
Result<bool> InMemoryCatalogImpl::NamespaceExists(const Namespace& ns) const {
354+
Result<bool> InMemoryCatalog::NamespaceExists(const Namespace& ns) const {
413355
std::unique_lock lock(mutex_);
414356
return root_namespace_->NamespaceExists(ns);
415357
}
416358

417-
Result<std::unordered_map<std::string, std::string>>
418-
InMemoryCatalogImpl::GetNamespaceProperties(const Namespace& ns) const {
419-
std::unique_lock lock(mutex_);
420-
return root_namespace_->GetProperties(ns);
421-
}
422-
423-
Status InMemoryCatalogImpl::UpdateNamespaceProperties(
359+
Status InMemoryCatalog::UpdateNamespaceProperties(
424360
const Namespace& ns, const std::unordered_map<std::string, std::string>& updates,
425361
const std::unordered_set<std::string>& removals) {
426362
std::unique_lock lock(mutex_);
427363
return root_namespace_->UpdateNamespaceProperties(ns, updates, removals);
428364
}
429365

430-
Result<std::vector<TableIdentifier>> InMemoryCatalogImpl::ListTables(
366+
Result<std::vector<TableIdentifier>> InMemoryCatalog::ListTables(
431367
const Namespace& ns) const {
432368
std::unique_lock lock(mutex_);
433369
const auto& table_names = root_namespace_->ListTables(ns);
@@ -440,44 +376,60 @@ Result<std::vector<TableIdentifier>> InMemoryCatalogImpl::ListTables(
440376
return table_idents;
441377
}
442378

443-
Result<std::unique_ptr<Table>> InMemoryCatalogImpl::CreateTable(
379+
Result<std::unique_ptr<Table>> InMemoryCatalog::CreateTable(
444380
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
445381
const std::string& location,
446382
const std::unordered_map<std::string, std::string>& properties) {
447383
return NotImplemented("create table");
448384
}
449385

450-
Result<std::unique_ptr<Table>> InMemoryCatalogImpl::UpdateTable(
386+
Result<std::unique_ptr<Table>> InMemoryCatalog::UpdateTable(
451387
const TableIdentifier& identifier,
452388
const std::vector<std::unique_ptr<UpdateRequirement>>& requirements,
453389
const std::vector<std::unique_ptr<MetadataUpdate>>& updates) {
454390
return NotImplemented("update table");
455391
}
456392

457-
Result<std::shared_ptr<Transaction>> InMemoryCatalogImpl::StageCreateTable(
393+
Result<std::shared_ptr<Transaction>> InMemoryCatalog::StageCreateTable(
458394
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
459395
const std::string& location,
460396
const std::unordered_map<std::string, std::string>& properties) {
461397
return NotImplemented("stage create table");
462398
}
463399

464-
Result<bool> InMemoryCatalogImpl::TableExists(const TableIdentifier& identifier) const {
400+
Result<bool> InMemoryCatalog::TableExists(const TableIdentifier& identifier) const {
465401
std::unique_lock lock(mutex_);
466402
return root_namespace_->TableExists(identifier);
467403
}
468404

469-
Status InMemoryCatalogImpl::DropTable(const TableIdentifier& identifier, bool purge) {
405+
Status InMemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) {
470406
std::unique_lock lock(mutex_);
471407
// TODO(Guotao): Delete all metadata files if purge is true.
472408
return root_namespace_->UnregisterTable(identifier);
473409
}
474410

475-
Result<std::shared_ptr<Table>> InMemoryCatalogImpl::LoadTable(
476-
const TableIdentifier& identifier) const {
477-
return NotImplemented("load table");
411+
Result<std::unique_ptr<Table>> InMemoryCatalog::LoadTable(
412+
const TableIdentifier& identifier) {
413+
if (!file_io_) [[unlikely]] {
414+
return InvalidArgument("file_io is not set for catalog {}", catalog_name_);
415+
}
416+
417+
Result<std::string> metadata_location;
418+
{
419+
std::unique_lock lock(mutex_);
420+
ICEBERG_ASSIGN_OR_RAISE(metadata_location,
421+
root_namespace_->GetTableMetadataLocation(identifier));
422+
}
423+
424+
ICEBERG_ASSIGN_OR_RAISE(auto metadata,
425+
TableMetadataUtil::Read(*file_io_, metadata_location.value()));
426+
427+
return std::make_unique<Table>(identifier, std::move(metadata),
428+
metadata_location.value(), file_io_,
429+
std::static_pointer_cast<Catalog>(shared_from_this()));
478430
}
479431

480-
Result<std::shared_ptr<Table>> InMemoryCatalogImpl::RegisterTable(
432+
Result<std::shared_ptr<Table>> InMemoryCatalog::RegisterTable(
481433
const TableIdentifier& identifier, const std::string& metadata_file_location) {
482434
std::unique_lock lock(mutex_);
483435
if (!root_namespace_->NamespaceExists(identifier.ns)) {
@@ -489,95 +441,6 @@ Result<std::shared_ptr<Table>> InMemoryCatalogImpl::RegisterTable(
489441
return LoadTable(identifier);
490442
}
491443

492-
std::unique_ptr<TableBuilder> InMemoryCatalogImpl::BuildTable(
493-
const TableIdentifier& identifier, const Schema& schema) const {
494-
throw IcebergError("not implemented");
495-
}
496-
497-
InMemoryCatalog::InMemoryCatalog(
498-
std::string const& name, std::shared_ptr<FileIO> const& file_io,
499-
std::string const& warehouse_location,
500-
std::unordered_map<std::string, std::string> const& properties)
501-
: impl_(std::make_unique<InMemoryCatalogImpl>(name, file_io, warehouse_location,
502-
properties)) {}
503-
504-
InMemoryCatalog::~InMemoryCatalog() = default;
505-
506-
std::string_view InMemoryCatalog::name() const { return impl_->name(); }
507-
508-
Status InMemoryCatalog::CreateNamespace(
509-
const Namespace& ns, const std::unordered_map<std::string, std::string>& properties) {
510-
return impl_->CreateNamespace(ns, properties);
511-
}
512-
513-
Result<std::unordered_map<std::string, std::string>>
514-
InMemoryCatalog::GetNamespaceProperties(const Namespace& ns) const {
515-
return impl_->GetNamespaceProperties(ns);
516-
}
517-
518-
Result<std::vector<Namespace>> InMemoryCatalog::ListNamespaces(
519-
const Namespace& ns) const {
520-
return impl_->ListNamespaces(ns);
521-
}
522-
523-
Status InMemoryCatalog::DropNamespace(const Namespace& ns) {
524-
return impl_->DropNamespace(ns);
525-
}
526-
527-
Result<bool> InMemoryCatalog::NamespaceExists(const Namespace& ns) const {
528-
return impl_->NamespaceExists(ns);
529-
}
530-
531-
Status InMemoryCatalog::UpdateNamespaceProperties(
532-
const Namespace& ns, const std::unordered_map<std::string, std::string>& updates,
533-
const std::unordered_set<std::string>& removals) {
534-
return impl_->UpdateNamespaceProperties(ns, updates, removals);
535-
}
536-
537-
Result<std::vector<TableIdentifier>> InMemoryCatalog::ListTables(
538-
const Namespace& ns) const {
539-
return impl_->ListTables(ns);
540-
}
541-
542-
Result<std::unique_ptr<Table>> InMemoryCatalog::CreateTable(
543-
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
544-
const std::string& location,
545-
const std::unordered_map<std::string, std::string>& properties) {
546-
return impl_->CreateTable(identifier, schema, spec, location, properties);
547-
}
548-
549-
Result<std::unique_ptr<Table>> InMemoryCatalog::UpdateTable(
550-
const TableIdentifier& identifier,
551-
const std::vector<std::unique_ptr<UpdateRequirement>>& requirements,
552-
const std::vector<std::unique_ptr<MetadataUpdate>>& updates) {
553-
return impl_->UpdateTable(identifier, requirements, updates);
554-
}
555-
556-
Result<std::shared_ptr<Transaction>> InMemoryCatalog::StageCreateTable(
557-
const TableIdentifier& identifier, const Schema& schema, const PartitionSpec& spec,
558-
const std::string& location,
559-
const std::unordered_map<std::string, std::string>& properties) {
560-
return impl_->StageCreateTable(identifier, schema, spec, location, properties);
561-
}
562-
563-
Result<bool> InMemoryCatalog::TableExists(const TableIdentifier& identifier) const {
564-
return impl_->TableExists(identifier);
565-
}
566-
567-
Status InMemoryCatalog::DropTable(const TableIdentifier& identifier, bool purge) {
568-
return impl_->DropTable(identifier, purge);
569-
}
570-
571-
Result<std::shared_ptr<Table>> InMemoryCatalog::LoadTable(
572-
const TableIdentifier& identifier) const {
573-
return impl_->LoadTable(identifier);
574-
}
575-
576-
Result<std::shared_ptr<Table>> InMemoryCatalog::RegisterTable(
577-
const TableIdentifier& identifier, const std::string& metadata_file_location) {
578-
return impl_->RegisterTable(identifier, metadata_file_location);
579-
}
580-
581444
std::unique_ptr<TableBuilder> InMemoryCatalog::BuildTable(
582445
const TableIdentifier& identifier, const Schema& schema) const {
583446
throw IcebergError("not implemented");

src/iceberg/catalog/in_memory_catalog.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919

2020
#pragma once
2121

22+
#include <mutex>
23+
2224
#include "iceberg/catalog.h"
2325

2426
namespace iceberg {
27+
2528
/**
2629
* @brief An in-memory implementation of the Iceberg Catalog interface.
2730
*
@@ -32,7 +35,9 @@ namespace iceberg {
3235
* @note This class is **not** suitable for production use.
3336
* All data will be lost when the process exits.
3437
*/
35-
class ICEBERG_EXPORT InMemoryCatalog : public Catalog {
38+
class ICEBERG_EXPORT InMemoryCatalog
39+
: public Catalog,
40+
public std::enable_shared_from_this<InMemoryCatalog> {
3641
public:
3742
InMemoryCatalog(std::string const& name, std::shared_ptr<FileIO> const& file_io,
3843
std::string const& warehouse_location,
@@ -79,8 +84,7 @@ class ICEBERG_EXPORT InMemoryCatalog : public Catalog {
7984

8085
Status DropTable(const TableIdentifier& identifier, bool purge) override;
8186

82-
Result<std::shared_ptr<Table>> LoadTable(
83-
const TableIdentifier& identifier) const override;
87+
Result<std::unique_ptr<Table>> LoadTable(const TableIdentifier& identifier) override;
8488

8589
Result<std::shared_ptr<Table>> RegisterTable(
8690
const TableIdentifier& identifier,
@@ -90,7 +94,12 @@ class ICEBERG_EXPORT InMemoryCatalog : public Catalog {
9094
const Schema& schema) const override;
9195

9296
private:
93-
std::unique_ptr<class InMemoryCatalogImpl> impl_;
97+
std::string catalog_name_;
98+
std::unordered_map<std::string, std::string> properties_;
99+
std::shared_ptr<FileIO> file_io_;
100+
std::string warehouse_location_;
101+
std::unique_ptr<class InMemoryNamespace> root_namespace_;
102+
mutable std::recursive_mutex mutex_;
94103
};
95104

96105
} // namespace iceberg

0 commit comments

Comments
 (0)