diff --git a/example/demo_example.cc b/example/demo_example.cc index 6f2cb1380..d20f48c0f 100644 --- a/example/demo_example.cc +++ b/example/demo_example.cc @@ -19,18 +19,63 @@ #include +#include "iceberg/arrow/arrow_file_io.h" #include "iceberg/avro/avro_register.h" -#include "iceberg/file_reader.h" +#include "iceberg/catalog/in_memory_catalog.h" #include "iceberg/parquet/parquet_register.h" +#include "iceberg/table.h" +#include "iceberg/table_scan.h" + +int main(int argc, char** argv) { + if (argc != 4) { + std::cerr << "Usage: " << argv[0] + << " " << std::endl; + return 0; + } + + const std::string warehouse_location = argv[1]; + const std::string table_name = argv[2]; + const std::string table_location = argv[3]; + const std::unordered_map properties; -int main() { iceberg::avro::RegisterAll(); iceberg::parquet::RegisterAll(); - auto open_result = iceberg::ReaderFactoryRegistry::Open( - iceberg::FileFormatType::kAvro, {.path = "non-existing-file.avro"}); - if (!open_result.has_value()) { - std::cerr << "Failed to open avro file" << std::endl; + + auto catalog = iceberg::InMemoryCatalog::Make("test", iceberg::arrow::MakeLocalFileIO(), + warehouse_location, properties); + + auto register_result = catalog->RegisterTable({.name = table_name}, table_location); + if (!register_result.has_value()) { + std::cerr << "Failed to register table: " << register_result.error().message + << std::endl; return 1; } + + auto load_result = catalog->LoadTable({.name = table_name}); + if (!load_result.has_value()) { + std::cerr << "Failed to load table: " << load_result.error().message << std::endl; + return 1; + } + + auto table = std::move(load_result.value()); + auto scan_result = table->NewScan()->Build(); + if (!scan_result.has_value()) { + std::cerr << "Failed to build scan: " << scan_result.error().message << std::endl; + return 1; + } + + auto scan = std::move(scan_result.value()); + auto plan_result = scan->PlanFiles(); + if (!plan_result.has_value()) { + std::cerr << "Failed to plan files: " << plan_result.error().message << std::endl; + return 1; + } + + std::cout << "Scan tasks: " << std::endl; + auto scan_tasks = std::move(plan_result.value()); + for (const auto& scan_task : scan_tasks) { + std::cout << " - " << scan_task->data_file()->file_path << std::endl; + } + return 0; } diff --git a/src/iceberg/avro.h b/src/iceberg/arrow/arrow_file_io.h similarity index 74% rename from src/iceberg/avro.h rename to src/iceberg/arrow/arrow_file_io.h index 9bd016017..12a9b2303 100644 --- a/src/iceberg/avro.h +++ b/src/iceberg/arrow/arrow_file_io.h @@ -19,16 +19,15 @@ #pragma once -#include +#include -#include "iceberg/iceberg_export.h" +#include "iceberg/file_io.h" +#include "iceberg/iceberg_bundle_export.h" -namespace iceberg { +namespace iceberg::arrow { -class ICEBERG_EXPORT Avro { - public: - virtual ~Avro() = default; - virtual std::string print() const = 0; -}; +ICEBERG_BUNDLE_EXPORT std::unique_ptr MakeMockFileIO(); -} // namespace iceberg +ICEBERG_BUNDLE_EXPORT std::unique_ptr MakeLocalFileIO(); + +} // namespace iceberg::arrow diff --git a/src/iceberg/arrow/arrow_fs_file_io.cc b/src/iceberg/arrow/arrow_fs_file_io.cc index 251455df5..35e0d4eee 100644 --- a/src/iceberg/arrow/arrow_fs_file_io.cc +++ b/src/iceberg/arrow/arrow_fs_file_io.cc @@ -23,6 +23,7 @@ #include #include "iceberg/arrow/arrow_error_transform_internal.h" +#include "iceberg/arrow/arrow_file_io.h" #include "iceberg/arrow/arrow_fs_file_io_internal.h" namespace iceberg::arrow { @@ -80,4 +81,12 @@ std::unique_ptr ArrowFileSystemFileIO::MakeLocalFileIO() { std::make_shared<::arrow::fs::LocalFileSystem>()); } +std::unique_ptr MakeMockFileIO() { + return ArrowFileSystemFileIO::MakeMockFileIO(); +} + +std::unique_ptr MakeLocalFileIO() { + return ArrowFileSystemFileIO::MakeLocalFileIO(); +} + } // namespace iceberg::arrow diff --git a/src/iceberg/avro/constants.h b/src/iceberg/avro/avro_constants.h similarity index 100% rename from src/iceberg/avro/constants.h rename to src/iceberg/avro/avro_constants.h diff --git a/src/iceberg/avro/avro_schema_util.cc b/src/iceberg/avro/avro_schema_util.cc index d411b4ecb..fd40cc7fc 100644 --- a/src/iceberg/avro/avro_schema_util.cc +++ b/src/iceberg/avro/avro_schema_util.cc @@ -30,9 +30,9 @@ #include #include +#include "iceberg/avro/avro_constants.h" #include "iceberg/avro/avro_register.h" #include "iceberg/avro/avro_schema_util_internal.h" -#include "iceberg/avro/constants.h" #include "iceberg/metadata_columns.h" #include "iceberg/name_mapping.h" #include "iceberg/schema.h" diff --git a/src/iceberg/catalog/in_memory_catalog.cc b/src/iceberg/catalog/in_memory_catalog.cc index 6c34d3adf..0215b9087 100644 --- a/src/iceberg/catalog/in_memory_catalog.cc +++ b/src/iceberg/catalog/in_memory_catalog.cc @@ -314,6 +314,13 @@ Result InMemoryNamespace::GetTableMetadataLocation( return it->second; } +std::shared_ptr InMemoryCatalog::Make( + std::string const& name, std::shared_ptr const& file_io, + std::string const& warehouse_location, + std::unordered_map const& properties) { + return std::make_shared(name, file_io, warehouse_location, properties); +} + InMemoryCatalog::InMemoryCatalog( std::string const& name, std::shared_ptr const& file_io, std::string const& warehouse_location, diff --git a/src/iceberg/catalog/in_memory_catalog.h b/src/iceberg/catalog/in_memory_catalog.h index cd5011797..bde97bae2 100644 --- a/src/iceberg/catalog/in_memory_catalog.h +++ b/src/iceberg/catalog/in_memory_catalog.h @@ -44,6 +44,11 @@ class ICEBERG_EXPORT InMemoryCatalog std::unordered_map const& properties); ~InMemoryCatalog() override; + static std::shared_ptr Make( + std::string const& name, std::shared_ptr const& file_io, + std::string const& warehouse_location, + std::unordered_map const& properties); + std::string_view name() const override; Status CreateNamespace( diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index 4484ad963..ad470a22e 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -745,9 +745,9 @@ Result> SnapshotFromJson(const nlohmann::json& json) { std::unordered_map summary; if (summary_json.has_value()) { for (const auto& [key, value] : summary_json->items()) { - if (!kValidSnapshotSummaryFields.contains(key)) { - return JsonParseError("Invalid snapshot summary field: {}", key); - } + // if (!kValidSnapshotSummaryFields.contains(key)) { + // return JsonParseError("Invalid snapshot summary field: {}", key); + // } if (!value.is_string()) { return JsonParseError("Invalid snapshot summary field value: {}", SafeDumpJson(value)); diff --git a/test/json_internal_test.cc b/test/json_internal_test.cc index 69ed10bb6..3a557ed20 100644 --- a/test/json_internal_test.cc +++ b/test/json_internal_test.cc @@ -225,7 +225,9 @@ TEST(JsonInternalTest, Snapshot) { TestJsonConversion(snapshot, expected_json); } -TEST(JsonInternalTest, SnapshotFromJsonWithInvalidSummary) { +// FIXME: disable it for now since Iceberg Spark plugin generates +// custom summary keys. +TEST(JsonInternalTest, DISABLED_SnapshotFromJsonWithInvalidSummary) { nlohmann::json invalid_json = R"({"snapshot-id":1234567890, "parent-snapshot-id":9876543210,