diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..ddff4407b --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-C", "target-cpu=native"] diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a8a24dd07..36cf67320 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,27 +10,20 @@ jobs: format: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - name: Install minimal stable with rustfmt + uses: actions-rust-lang/setup-rust-toolchain@v1 with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + components: rustfmt - name: format run: cargo fmt -- --check + msrv: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install minimal stable and cargo msrv - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-msrv shell: bash run: | @@ -46,11 +39,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install minimal stable and cargo msrv - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true + uses: actions-rust-lang/setup-rust-toolchain@v1 - uses: Swatinem/rust-cache@v2 - name: Install cargo-msrv shell: bash @@ -74,16 +63,34 @@ jobs: env: RUSTDOCFLAGS: -D warnings steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install minimal stable - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: build docs run: cargo doc + + + # When we run cargo { build, clippy } --no-default-features, we want to build/lint the kernel to + # ensure that we can build the kernel without any features enabled. Unfortunately, due to how + # cargo resolves features, if we have a workspace member that depends on the kernel with features + # enabled, the kernel will be compiled with those features (even if we specify + # --no-default-features). + # + # To cope with this, we split build/clippy --no-default-features into two runs: + # 1. build/clippy all packages that depend on the kernel with some features enabled: + # - acceptance + # - test_utils + # - feature_tests + # (and examples) + # - inspect-table + # - read-table-changes + # - read-table-multi-threaded + # - read-table-single-threaded + # 2. build/clippy all packages that only have no-feature kernel dependency + # - delta_kernel + # - delta_kernel_derive + # - delta_kernel_ffi + # - delta_kernel_ffi_macros build: runs-on: ${{ matrix.os }} strategy: @@ -93,20 +100,17 @@ jobs: - ubuntu-latest - windows-latest steps: - - uses: actions/checkout@v3 - - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - name: Install minimal stable with clippy + uses: actions-rust-lang/setup-rust-toolchain@v1 with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 - - name: check kernel builds with no-default-features - run: cargo build -p delta_kernel --no-default-features + components: clippy - name: build and lint with clippy run: cargo clippy --benches --tests --all-features -- -D warnings - - name: lint without default features - run: cargo clippy --no-default-features -- -D warnings + - name: lint without default features - packages which depend on kernel with features enabled + run: cargo clippy --workspace --no-default-features --exclude delta_kernel --exclude delta_kernel_ffi --exclude delta_kernel_derive --exclude delta_kernel_ffi_macros -- -D warnings + - name: lint without default features - packages which don't depend on kernel with features enabled + run: cargo clippy --no-default-features --package delta_kernel --package delta_kernel_ffi --package delta_kernel_derive --package delta_kernel_ffi_macros -- -D warnings - name: check kernel builds with default-engine run: cargo build -p feature_tests --features default-engine - name: check kernel builds with default-engine-rustls @@ -120,14 +124,9 @@ jobs: - ubuntu-latest - windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: test run: cargo test --workspace --verbose --all-features -- --skip read_table_version_hdfs @@ -220,14 +219,9 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install rust - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - - uses: Swatinem/rust-cache@v2 - name: Generate code coverage run: cargo llvm-cov --all-features --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs - name: Upload coverage to Codecov diff --git a/.github/workflows/run_integration_test.yml b/.github/workflows/run_integration_test.yml index 1ff681cf6..73ffd599c 100644 --- a/.github/workflows/run_integration_test.yml +++ b/.github/workflows/run_integration_test.yml @@ -18,17 +18,11 @@ jobs: - name: Skip job for pull requests on Windows if: ${{ matrix.skip }} run: echo "Skipping job for pull requests on Windows." - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 if: ${{ !matrix.skip }} - - name: Install minimal stable rust - if: ${{ !matrix.skip }} - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + - name: Setup rust toolchain if: ${{ !matrix.skip }} + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Run integration tests if: ${{ !matrix.skip }} shell: bash diff --git a/.github/workflows/semver-checks.yml b/.github/workflows/semver-checks.yml index 3f0374452..7bd39b167 100644 --- a/.github/workflows/semver-checks.yml +++ b/.github/workflows/semver-checks.yml @@ -25,12 +25,7 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} - name: Install minimal stable - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-semver-checks shell: bash run: | diff --git a/.gitignore b/.gitignore index ba9bf6241..b057be7f8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,6 @@ .zed # Rust -.cargo/ target/ integration-tests/Cargo.lock diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d403a1fd..bf2228ebc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,239 @@ # Changelog +## [v0.9.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.9.0/) (2025-04-08) + +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.8.0...v0.9.0) + +### ๐Ÿ—๏ธ Breaking changes +1. Change `MetadataValue::Number(i32)` to `MetadataValue::Number(i64)` ([#733]) +2. Get prefix from offset path: `DefaultEngine::new` no longer requires a `table_root` parameter + and `list_from` consistently returns keys greater than the offset ([#699]) +3. Make `snapshot.schema()` return a `SchemaRef` ([#751]) +4. Make `visit_expression_internal` private, and `unwrap_kernel_expression` pub(crate) ([#767]) +5. Make actions types `pub(crate)` instead of `pub` ([#405]) +6. New `null_row` ExpressionHandler API ([#662]) +7. Rename enums `ReaderFeatures` -> `ReaderFeature` and `WriterFeatures` -> `WriterFeature` ([#802]) +8. Remove `get_` prefix from engine getters ([#804]) +9. Rename `FileSystemClient` to `StorageHandler` ([#805]) +10. Adopt types for table features (New `ReadFeature::Unknown(String)` and + (`WriterFeature::Unknown(String)`) ([#684]) +11. Renamed `ScanData` to `ScanMetadata` ([#817]) + - rename `ScanData` to `ScanMetadata` + - rename `Scan::scan_data()` to `Scan::scan_metadata()` + - (ffi) rename `free_kernel_scan_data()` to `free_scan_metadata_iter()` + - (ffi) rename `kernel_scan_data_next()` to `scan_metadata_next()` + - (ffi) rename `visit_scan_data()` to `visit_scan_metadata()` + - (ffi) rename `kernel_scan_data_init()` to `scan_metadata_iter_init()` + - (ffi) rename `KernelScanDataIterator` to `ScanMetadataIterator` + - (ffi) rename `SharedScanDataIterator` to `SharedScanMetadataIterator` +12. `ScanMetadata` is now a struct (instead of tuple) with new `FiltereEngineData` type ([#768]) + +### ๐Ÿš€ Features / new APIs + +1. (`v2Checkpoint`) Extract & insert sidecar batches in `replay`'s action iterator ([#679]) +2. Support the `v2Checkpoint` reader/writer feature ([#685]) +3. Add check for whether `appendOnly` table feature is supported or enabled ([#664]) +4. Add basic partition pruning support ([#713]) +5. Add `DeletionVectors` to supported writer features ([#735]) +6. Add writer version 2/invariant table feature support ([#734]) +7. Improved pre-signed URL checks ([#760]) +8. Add `CheckpointMetadata` action ([#781]) +9. Add classic and uuid parquet checkpoint path generation ([#782]) +10. New `Snapshot::try_new_from()` API ([#549]) + +### ๐Ÿ› Bug Fixes + +1. Return `Error::unsupported` instead of panic in `Scalar::to_array(MapType)` ([#757]) +2. Remove 'default-members' in workspace, default to all crates ([#752]) +3. Update compilation error and clippy lints for rustc 1.86 ([#800]) + +### ๐Ÿšœ Refactor + +1. Split up `arrow_expression` module ([#750]) +2. Flatten deeply nested match statement ([#756]) +3. Simplify predicate evaluation by supporting inversion ([#761]) +4. Rename `LogSegment::replay` to `LogSegment::read_actions` ([#766]) +5. Extract deduplication logic from `AddRemoveDedupVisitor` into embeddable `FileActionsDeduplicator` ([#769]) +6. Move testing helper function to `test_utils` mod ([#794]) +7. Rename `_last_checkpoint` from `CheckpointMetadata` to `LastCheckpointHint` ([#789]) +8. Use ExpressionTransform instead of adhoc expression traversals ([#803]) +9. Extract log replay processing structure into `LogReplayProcessor` trait ([#774]) + +### ๐Ÿงช Testing + +1. Add V2 checkpoint read support integration tests ([#690]) + +### โš™๏ธ Chores/CI + +1. Use maintained action to setup rust toolchain ([#585]) + +### Other + +1. Update HDFS dependencies ([#689]) +2. Add .cargo/config.toml with native instruction codegen ([#772]) + + +[#679]: https://github.com/delta-io/delta-kernel-rs/pull/679 +[#685]: https://github.com/delta-io/delta-kernel-rs/pull/685 +[#689]: https://github.com/delta-io/delta-kernel-rs/pull/689 +[#664]: https://github.com/delta-io/delta-kernel-rs/pull/664 +[#690]: https://github.com/delta-io/delta-kernel-rs/pull/690 +[#713]: https://github.com/delta-io/delta-kernel-rs/pull/713 +[#735]: https://github.com/delta-io/delta-kernel-rs/pull/735 +[#734]: https://github.com/delta-io/delta-kernel-rs/pull/734 +[#733]: https://github.com/delta-io/delta-kernel-rs/pull/733 +[#585]: https://github.com/delta-io/delta-kernel-rs/pull/585 +[#750]: https://github.com/delta-io/delta-kernel-rs/pull/750 +[#756]: https://github.com/delta-io/delta-kernel-rs/pull/756 +[#757]: https://github.com/delta-io/delta-kernel-rs/pull/757 +[#699]: https://github.com/delta-io/delta-kernel-rs/pull/699 +[#752]: https://github.com/delta-io/delta-kernel-rs/pull/752 +[#751]: https://github.com/delta-io/delta-kernel-rs/pull/751 +[#761]: https://github.com/delta-io/delta-kernel-rs/pull/761 +[#760]: https://github.com/delta-io/delta-kernel-rs/pull/760 +[#766]: https://github.com/delta-io/delta-kernel-rs/pull/766 +[#767]: https://github.com/delta-io/delta-kernel-rs/pull/767 +[#405]: https://github.com/delta-io/delta-kernel-rs/pull/405 +[#772]: https://github.com/delta-io/delta-kernel-rs/pull/772 +[#662]: https://github.com/delta-io/delta-kernel-rs/pull/662 +[#769]: https://github.com/delta-io/delta-kernel-rs/pull/769 +[#794]: https://github.com/delta-io/delta-kernel-rs/pull/794 +[#781]: https://github.com/delta-io/delta-kernel-rs/pull/781 +[#789]: https://github.com/delta-io/delta-kernel-rs/pull/789 +[#800]: https://github.com/delta-io/delta-kernel-rs/pull/800 +[#802]: https://github.com/delta-io/delta-kernel-rs/pull/802 +[#803]: https://github.com/delta-io/delta-kernel-rs/pull/803 +[#774]: https://github.com/delta-io/delta-kernel-rs/pull/774 +[#804]: https://github.com/delta-io/delta-kernel-rs/pull/804 +[#782]: https://github.com/delta-io/delta-kernel-rs/pull/782 +[#805]: https://github.com/delta-io/delta-kernel-rs/pull/805 +[#549]: https://github.com/delta-io/delta-kernel-rs/pull/549 +[#684]: https://github.com/delta-io/delta-kernel-rs/pull/684 +[#817]: https://github.com/delta-io/delta-kernel-rs/pull/817 +[#768]: https://github.com/delta-io/delta-kernel-rs/pull/768 + + +## [v0.8.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.8.0/) (2025-03-04) + +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.7.0...v0.8.0) + +### ๐Ÿ—๏ธ Breaking changes + +1. ffi: `get_partition_column_count` and `get_partition_columns` now take a `Snapshot` instead of a + `Scan` ([#697]) +2. ffi: expression visitor callback `visit_literal_decimal` now takes `i64` for the upper half of a 128-bit int value ([#724]) +3. - `DefaultJsonHandler::with_readahead()` renamed to `DefaultJsonHandler::with_buffer_size()` ([#711]) +4. DefaultJsonHandler's defaults changed: + - default buffer size: 10 => 1000 requests/files + - default batch size: 1024 => 1000 rows +5. Bump MSRV to rustc 1.81 ([#725]) + +### ๐Ÿ› Bug Fixes + +1. Pin `chrono` version to fix arrow compilation failure ([#719]) + +### โšก Performance + +1. Replace default engine JSON reader's `FileStream` with concurrent futures ([#711]) + + +[#719]: https://github.com/delta-io/delta-kernel-rs/pull/719 +[#724]: https://github.com/delta-io/delta-kernel-rs/pull/724 +[#697]: https://github.com/delta-io/delta-kernel-rs/pull/697 +[#725]: https://github.com/delta-io/delta-kernel-rs/pull/725 +[#711]: https://github.com/delta-io/delta-kernel-rs/pull/711 + + +## [v0.7.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.7.0/) (2025-02-24) + +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.1...v0.7.0) + +### ๐Ÿ—๏ธ Breaking changes +1. Read transforms are now communicated via expressions ([#607], [#612], [#613], [#614]) This includes: + - `ScanData` now includes a third tuple field: a row-indexed vector of transforms to apply to the `EngineData`. + - Adds a new `scan::state::transform_to_logical` function that encapsulates the boilerplate of applying the transform expression + - Removes `scan_action_iter` API and `logical_to_physical` API + - Removes `column_mapping_mode` from `GlobalScanState` + - ffi: exposes methods to get an expression evaluator and evaluate an expression from c + - read-table example: Removes `add_partition_columns` in arrow.c + - read-table example: adds an `apply_transform` function in arrow.c +2. ffi: support field nullability in schema visitor ([#656]) +3. ffi: expose metadata in SchemaEngineVisitor ffi api ([#659]) +4. ffi: new `visit_schema` FFI now operates on a `Schema` instead of a `Snapshot` ([#683], [#709]) +5. Introduced feature flags (`arrow_54` and `arrow_53`) to select major arrow versions ([#654], [#708], [#717]) + +### ๐Ÿš€ Features / new APIs + +1. Read `partition_values` in `RemoveVisitor` and remove `break` in `RowVisitor` for `RemoveVisitor` ([#633]) +2. Add the in-commit timestamp field to CommitInfo ([#581]) +3. Support NOT and column expressions in eval_sql_where ([#653]) +4. Add check for schema read compatibility ([#554]) +5. Introduce `TableConfiguration` to jointly manage metadata, protocol, and table properties ([#644]) +6. Add visitor `SidecarVisitor` and `Sidecar` action struct ([#673]) +7. Add in-commit timestamps table properties ([#558]) +8. Support writing to writer version 1 ([#693]) +9. ffi: new `logical_schema` FFI to get the logical schema of a snapshot ([#709]) + +### ๐Ÿ› Bug Fixes + +1. Incomplete multi-part checkpoint handling when no hint is provided ([#641]) +2. Consistent PartialEq for Scalar ([#677]) +3. Cargo fmt does not handle mods defined in macros ([#676]) +4. Ensure properly nested null masks for parquet reads ([#692]) +5. Handle predicates on non-nullable columns without stats ([#700]) + +### ๐Ÿ“š Documentation + +1. Update readme to reflect tracing feature is needed for read-table ([#619]) +2. Clarify `JsonHandler` semantics on EngineData ordering ([#635]) + +### ๐Ÿšœ Refactor + +1. Make [non] nullable struct fields easier to create ([#646]) +2. Make eval_sql_where available to DefaultPredicateEvaluator ([#627]) + +### ๐Ÿงช Testing + +1. Port cdf tests from delta-spark to kernel ([#611]) + +### โš™๏ธ Chores/CI + +1. Fix some typos ([#643]) +2. Release script publishing fixes ([#638]) + +[#638]: https://github.com/delta-io/delta-kernel-rs/pull/638 +[#643]: https://github.com/delta-io/delta-kernel-rs/pull/643 +[#619]: https://github.com/delta-io/delta-kernel-rs/pull/619 +[#635]: https://github.com/delta-io/delta-kernel-rs/pull/635 +[#633]: https://github.com/delta-io/delta-kernel-rs/pull/633 +[#611]: https://github.com/delta-io/delta-kernel-rs/pull/611 +[#581]: https://github.com/delta-io/delta-kernel-rs/pull/581 +[#646]: https://github.com/delta-io/delta-kernel-rs/pull/646 +[#627]: https://github.com/delta-io/delta-kernel-rs/pull/627 +[#641]: https://github.com/delta-io/delta-kernel-rs/pull/641 +[#653]: https://github.com/delta-io/delta-kernel-rs/pull/653 +[#607]: https://github.com/delta-io/delta-kernel-rs/pull/607 +[#656]: https://github.com/delta-io/delta-kernel-rs/pull/656 +[#554]: https://github.com/delta-io/delta-kernel-rs/pull/554 +[#644]: https://github.com/delta-io/delta-kernel-rs/pull/644 +[#659]: https://github.com/delta-io/delta-kernel-rs/pull/659 +[#612]: https://github.com/delta-io/delta-kernel-rs/pull/612 +[#677]: https://github.com/delta-io/delta-kernel-rs/pull/677 +[#676]: https://github.com/delta-io/delta-kernel-rs/pull/676 +[#673]: https://github.com/delta-io/delta-kernel-rs/pull/673 +[#613]: https://github.com/delta-io/delta-kernel-rs/pull/613 +[#558]: https://github.com/delta-io/delta-kernel-rs/pull/558 +[#692]: https://github.com/delta-io/delta-kernel-rs/pull/692 +[#700]: https://github.com/delta-io/delta-kernel-rs/pull/700 +[#683]: https://github.com/delta-io/delta-kernel-rs/pull/683 +[#654]: https://github.com/delta-io/delta-kernel-rs/pull/654 +[#693]: https://github.com/delta-io/delta-kernel-rs/pull/693 +[#614]: https://github.com/delta-io/delta-kernel-rs/pull/614 +[#709]: https://github.com/delta-io/delta-kernel-rs/pull/709 +[#708]: https://github.com/delta-io/delta-kernel-rs/pull/708 +[#717]: https://github.com/delta-io/delta-kernel-rs/pull/717 + + ## [v0.6.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.6.1/) (2025-01-10) [Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.0...v0.6.1) diff --git a/Cargo.lock b/Cargo.lock index 57beb9412..9d30a071f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,20 +4,14 @@ version = 3 [[package]] name = "acceptance" -version = "0.6.1" +version = "0.9.0" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-ord", - "arrow-schema", - "arrow-select", "datatest-stable", "delta_kernel", "flate2", "futures", "itertools 0.13.0", "object_store", - "parquet", "serde", "serde_json", "tar", @@ -59,16 +53,16 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.2.15", + "getrandom 0.3.3", "once_cell", "version_check", - "zerocopy 0.7.35", + "zerocopy", ] [[package]] @@ -162,55 +156,108 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.95" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arrow" -version = "54.2.0" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74" +dependencies = [ + "arrow-arith 53.4.1", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-csv 53.4.1", + "arrow-data 53.4.1", + "arrow-ipc 53.4.1", + "arrow-json 53.4.1", + "arrow-ord 53.4.1", + "arrow-row 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "arrow-string 53.4.1", +] + +[[package]] +name = "arrow" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1" +dependencies = [ + "arrow-arith 54.2.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-cast 54.2.1", + "arrow-csv 54.2.1", + "arrow-data 54.3.1", + "arrow-ipc 54.2.1", + "arrow-json 54.2.1", + "arrow-ord 54.2.1", + "arrow-row 54.2.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", + "arrow-string 54.2.1", +] + +[[package]] +name = "arrow-arith" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7" +checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "chrono", + "half", + "num", ] [[package]] name = "arrow-arith" -version = "54.2.0" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8" +checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "chrono", "num", ] [[package]] name = "arrow-array" -version = "54.2.0" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf" +dependencies = [ + "ahash", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "chrono", + "chrono-tz", + "half", + "hashbrown", + "num", +] + +[[package]] +name = "arrow-array" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a" +checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "chrono", "chrono-tz", "half", @@ -220,28 +267,60 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a" +checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8" dependencies = [ "bytes", "half", "num", ] +[[package]] +name = "arrow-buffer" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac" +dependencies = [ + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + [[package]] name = "arrow-cast" -version = "54.2.0" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee" +checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", "atoi", - "base64 0.22.1", + "base64", "chrono", "comfy-table", "half", @@ -252,13 +331,32 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c" +checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-csv" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65" +dependencies = [ + "arrow-array 54.2.1", + "arrow-cast 54.2.1", + "arrow-schema 54.3.1", "chrono", "csv", "csv-core", @@ -268,40 +366,86 @@ dependencies = [ [[package]] name = "arrow-data" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83" +checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 53.4.1", + "arrow-schema 53.4.1", "half", "num", ] +[[package]] +name = "arrow-data" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429" +dependencies = [ + "arrow-buffer 54.3.1", + "arrow-schema 54.3.1", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b" +dependencies = [ + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "flatbuffers", +] + [[package]] name = "arrow-ipc" -version = "54.2.0" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6" +checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "flatbuffers", ] [[package]] name = "arrow-json" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305" +checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-json" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-cast 54.2.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "chrono", "half", "indexmap", @@ -313,64 +457,133 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee" +checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", ] [[package]] name = "arrow-row" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4" +checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "ahash", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", "half", ] +[[package]] +name = "arrow-row" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "half", +] + +[[package]] +name = "arrow-schema" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "arrow-schema" -version = "54.2.0" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9" +dependencies = [ + "bitflags 2.9.1", +] + +[[package]] +name = "arrow-select" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735" +checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984" dependencies = [ - "bitflags 2.8.0", + "ahash", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "num", ] [[package]] name = "arrow-select" -version = "54.2.0" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539" +checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", "num", ] [[package]] name = "arrow-string" -version = "54.2.0" +version = "53.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0" +checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-data 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "arrow-string" +version = "54.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458" +dependencies = [ + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-data 54.3.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", "memchr", "num", "regex", @@ -379,13 +592,13 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.86" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -411,9 +624,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ "addr2line", "cfg-if", @@ -424,12 +637,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -459,9 +666,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "block-buffer" @@ -494,9 +701,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "4.0.2" +version = "4.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -510,9 +717,9 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "bytemuck" -version = "1.21.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" +checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c" [[package]] name = "byteorder" @@ -522,9 +729,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "camino" @@ -543,9 +750,9 @@ dependencies = [ [[package]] name = "cbindgen" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb" +checksum = "eadd868a2ce9ca38de7eeafdcec9c7065ef89b42b32f0839278d55f35c54d1ff" dependencies = [ "clap", "heck 0.4.1", @@ -555,16 +762,16 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.98", + "syn 2.0.101", "tempfile", "toml", ] [[package]] name = "cc" -version = "1.2.0" +version = "1.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8" +checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766" dependencies = [ "jobserver", "libc", @@ -600,9 +807,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.10.1" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" +checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3" dependencies = [ "chrono", "chrono-tz-build", @@ -611,9 +818,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" +checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402" dependencies = [ "parse-zoneinfo", "phf_codegen", @@ -631,9 +838,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.22" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69371e34337c4c984bbe322360c2547210bf632eb2814bbe78a6e87a2935bd2b" +checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" dependencies = [ "clap_builder", "clap_derive", @@ -641,9 +848,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.22" +version = "4.5.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e24c1b4099818523236a8ca881d2b45db98dadfb4625cf6608c12069fcbbde1" +checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" dependencies = [ "anstream", "anstyle", @@ -653,14 +860,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.18" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -700,7 +907,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "once_cell", "tiny-keccak", ] @@ -742,9 +949,9 @@ dependencies = [ [[package]] name = "crc" -version = "3.2.1" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" dependencies = [ "crc-catalog", ] @@ -824,17 +1031,11 @@ dependencies = [ [[package]] name = "delta_kernel" -version = "0.6.1" +version = "0.9.0" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-json", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow 53.4.1", + "arrow 54.2.1", + "async-trait", "bytes", "chrono", "delta_kernel", @@ -847,7 +1048,8 @@ dependencies = [ "indexmap", "itertools 0.13.0", "object_store", - "parquet", + "parquet 53.4.1", + "parquet 54.2.1", "paste", "reqwest", "roaring", @@ -873,20 +1075,17 @@ dependencies = [ [[package]] name = "delta_kernel_derive" -version = "0.6.1" +version = "0.9.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "delta_kernel_ffi" -version = "0.6.1" +version = "0.9.0" dependencies = [ - "arrow-array", - "arrow-data", - "arrow-schema", "cbindgen", "delta_kernel", "delta_kernel_ffi_macros", @@ -904,11 +1103,11 @@ dependencies = [ [[package]] name = "delta_kernel_ffi_macros" -version = "0.6.1" +version = "0.9.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -939,7 +1138,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -956,9 +1155,9 @@ dependencies = [ [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encoding_rs" @@ -979,16 +1178,22 @@ dependencies = [ "regex", ] +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + [[package]] name = "env_logger" -version = "0.11.5" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ "anstream", "anstyle", "env_filter", - "humantime", + "jiff", "log", ] @@ -1000,9 +1205,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", "windows-sys 0.59.0", @@ -1033,7 +1238,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "feature_tests" -version = "0.6.1" +version = "0.9.0" dependencies = [ "delta_kernel", ] @@ -1082,9 +1287,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -1176,7 +1381,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -1211,21 +1416,21 @@ dependencies = [ [[package]] name = "g2gen" -version = "1.1.0" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc3e32f911a41e073b8492473c3595a043e1369ab319a2dbf8c89b1fea06457c" +checksum = "c5a7e0eb46f83a20260b850117d204366674e85d3a908d90865c78df9a6b1dfc" dependencies = [ "g2poly", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "g2p" -version = "1.1.0" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a9afa6efed9af3a5a68ba066429c1497c299d4eafbd948fe630df47a8f2d29f" +checksum = "539e2644c030d3bf4cd208cb842d2ce2f80e82e6e8472390bcef83ceba0d80ad" dependencies = [ "g2gen", "g2poly", @@ -1233,9 +1438,9 @@ dependencies = [ [[package]] name = "g2poly" -version = "1.1.0" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd8b261ccf00df8c5cc60c082bb7d7aa64c33a433cfcc091ca244326c924b2c" +checksum = "312d2295c7302019c395cfb90dacd00a82a2eabd700429bba9c7a3f38dbbe11b" [[package]] name = "generic-array" @@ -1249,9 +1454,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "js-sys", @@ -1262,14 +1467,16 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", + "js-sys", "libc", - "wasi 0.13.3+wasi-0.2.2", - "windows-targets 0.52.6", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -1286,9 +1493,9 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "h2" -version = "0.4.7" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +checksum = "a9421a676d1b147b16b82c9225157dc629087ef8ec4d5e2960f9437a90dac0a5" dependencies = [ "atomic-waker", "bytes", @@ -1305,9 +1512,9 @@ dependencies = [ [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", @@ -1316,19 +1523,19 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" [[package]] name = "hdfs-native" -version = "0.10.4" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e72db0dfc43c1e6b7ef6d34f6d38eff079cbd30dbc18924b27108793e47893c" +checksum = "fe9a986a98854573dfbc130f42f81e92f6d4581e23060708842fede6edef0f1f" dependencies = [ "aes", - "base64 0.21.7", - "bitflags 2.8.0", + "base64", + "bitflags 2.9.1", "bytes", "cbc", "chrono", @@ -1353,7 +1560,7 @@ dependencies = [ "regex", "roxmltree", "socket2", - "thiserror 1.0.69", + "thiserror 2.0.12", "tokio", "url", "uuid", @@ -1363,9 +1570,9 @@ dependencies = [ [[package]] name = "hdfs-native-object-store" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d441ed4b31eceee8ffc10690ac2cbcab68d64c453238fa21ec4926f0dbb217" +checksum = "2f2ed9304bed0023daf452b8017efd2f663d7e693f8b7f29a54e6c1c3d9aefd9" dependencies = [ "async-trait", "bytes", @@ -1373,7 +1580,7 @@ dependencies = [ "futures", "hdfs-native", "object_store", - "thiserror 1.0.69", + "thiserror 2.0.12", "tokio", ] @@ -1415,9 +1622,9 @@ dependencies = [ [[package]] name = "http" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", @@ -1436,12 +1643,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", - "futures-util", + "futures-core", "http", "http-body", "pin-project-lite", @@ -1449,15 +1656,15 @@ dependencies = [ [[package]] name = "httparse" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "humantime" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] name = "hyper" @@ -1515,9 +1722,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.10" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +checksum = "cf9f1e950e0d9d1d3c47184416723cf29c0d1f93bd8cccf37e4beb6b44f31710" dependencies = [ "bytes", "futures-channel", @@ -1525,6 +1732,7 @@ dependencies = [ "http", "http-body", "hyper", + "libc", "pin-project-lite", "socket2", "tokio", @@ -1534,14 +1742,15 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -1557,21 +1766,22 @@ dependencies = [ [[package]] name = "icu_collections" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" dependencies = [ "displaydoc", + "potential_utf", "yoke", "zerofrom", "zerovec", ] [[package]] -name = "icu_locid" -version = "1.5.0" +name = "icu_locale_core" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" dependencies = [ "displaydoc", "litemap", @@ -1580,31 +1790,11 @@ dependencies = [ "zerovec", ] -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - [[package]] name = "icu_normalizer" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" dependencies = [ "displaydoc", "icu_collections", @@ -1612,67 +1802,54 @@ dependencies = [ "icu_properties", "icu_provider", "smallvec", - "utf16_iter", - "utf8_iter", - "write16", "zerovec", ] [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" [[package]] name = "icu_properties" -version = "1.5.1" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" dependencies = [ "displaydoc", "icu_collections", - "icu_locid_transform", + "icu_locale_core", "icu_properties_data", "icu_provider", - "tinystr", + "potential_utf", + "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" [[package]] name = "icu_provider" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" dependencies = [ "displaydoc", - "icu_locid", - "icu_provider_macros", + "icu_locale_core", "stable_deref_trait", "tinystr", "writeable", "yoke", "zerofrom", + "zerotrie", "zerovec", ] -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "idna" version = "1.0.3" @@ -1686,9 +1863,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" dependencies = [ "icu_normalizer", "icu_properties", @@ -1696,9 +1873,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", @@ -1706,9 +1883,9 @@ dependencies = [ [[package]] name = "inout" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ "block-padding", "generic-array", @@ -1718,8 +1895,7 @@ dependencies = [ name = "inspect-table" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow 53.4.1", "clap", "delta_kernel", "env_logger", @@ -1746,34 +1922,59 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] [[package]] name = "itertools" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] [[package]] name = "jobserver" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" dependencies = [ + "getrandom 0.3.3", "libc", ] @@ -1859,25 +2060,25 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.53.0", ] [[package]] name = "libm" -version = "0.2.11" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" @@ -1885,7 +2086,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", "libc", "redox_syscall", ] @@ -1904,15 +2105,15 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" [[package]] name = "litemap" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "lock_api" @@ -1926,9 +2127,15 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "lru-slab" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" @@ -1972,9 +2179,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -1992,9 +2199,9 @@ dependencies = [ [[package]] name = "native-tls" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" dependencies = [ "libc", "log", @@ -2107,7 +2314,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" dependencies = [ "async-trait", - "base64 0.22.1", + "base64", "bytes", "chrono", "futures", @@ -2134,17 +2341,17 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl" -version = "0.10.71" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", "cfg-if", "foreign-types", "libc", @@ -2161,7 +2368,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -2172,9 +2379,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.106" +version = "0.9.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd" +checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" dependencies = [ "cc", "libc", @@ -2222,19 +2429,55 @@ dependencies = [ [[package]] name = "parquet" -version = "54.2.0" +version = "53.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c" +dependencies = [ + "ahash", + "arrow-array 53.4.1", + "arrow-buffer 53.4.1", + "arrow-cast 53.4.1", + "arrow-data 53.4.1", + "arrow-ipc 53.4.1", + "arrow-schema 53.4.1", + "arrow-select 53.4.1", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parquet" +version = "54.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb" +checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.22.1", + "arrow-array 54.2.1", + "arrow-buffer 54.3.1", + "arrow-cast 54.2.1", + "arrow-data 54.3.1", + "arrow-ipc 54.2.1", + "arrow-schema 54.3.1", + "arrow-select 54.2.1", + "base64", "brotli", "bytes", "chrono", @@ -2330,33 +2573,57 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "potential_utf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +dependencies = [ + "zerovec", +] [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.7.35", + "zerocopy", ] [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.12.6" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", "prost-derive", @@ -2364,31 +2631,31 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.6" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "prost-types" -version = "0.12.6" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ "prost", ] [[package]] name = "quick-xml" -version = "0.37.2" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" dependencies = [ "memchr", "serde", @@ -2396,37 +2663,40 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" dependencies = [ "bytes", + "cfg_aliases", "pin-project-lite", "quinn-proto", "quinn-udp", "rustc-hash", "rustls", "socket2", - "thiserror 2.0.11", + "thiserror 2.0.12", "tokio", "tracing", + "web-time", ] [[package]] name = "quinn-proto" -version = "0.11.9" +version = "0.11.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" dependencies = [ "bytes", - "getrandom 0.2.15", - "rand 0.8.5", + "getrandom 0.3.3", + "lru-slab", + "rand 0.9.1", "ring", "rustc-hash", "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.11", + "thiserror 2.0.12", "tinyvec", "tracing", "web-time", @@ -2434,9 +2704,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.10" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" +checksum = "ee4e529991f949c5e25755532370b8af5d114acae52326361d68d47af64aa842" dependencies = [ "cfg_aliases", "libc", @@ -2448,13 +2718,19 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" @@ -2468,13 +2744,12 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.1", - "zerocopy 0.8.18", + "rand_core 0.9.3", ] [[package]] @@ -2494,7 +2769,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.1", + "rand_core 0.9.3", ] [[package]] @@ -2503,26 +2778,22 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] name = "rand_core" -version = "0.9.1" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a88e0da7a2c97baa202165137c158d0a2e824ac465d13d81046727b34cb247d3" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.1", - "zerocopy 0.8.18", + "getrandom 0.3.3", ] [[package]] name = "read-table-changes" version = "0.1.0" dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", "clap", "delta_kernel", "env_logger", @@ -2534,7 +2805,7 @@ dependencies = [ name = "read-table-multi-threaded" version = "0.1.0" dependencies = [ - "arrow", + "arrow 53.4.1", "clap", "delta_kernel", "env_logger", @@ -2547,7 +2818,7 @@ dependencies = [ name = "read-table-single-threaded" version = "0.1.0" dependencies = [ - "arrow", + "arrow 53.4.1", "clap", "delta_kernel", "env_logger", @@ -2557,11 +2828,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.8" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", ] [[package]] @@ -2610,11 +2881,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.12" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "encoding_rs", "futures-core", @@ -2661,13 +2932,13 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.9" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.15", + "getrandom 0.2.16", "libc", "untrusted", "windows-sys 0.52.0", @@ -2675,9 +2946,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.10" +version = "0.10.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a652edd001c53df0b3f96a36a8dc93fce6866988efc16808235653c6bcac8bf2" +checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" dependencies = [ "bytemuck", "byteorder", @@ -2685,12 +2956,9 @@ dependencies = [ [[package]] name = "roxmltree" -version = "0.18.1" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862340e351ce1b271a378ec53f304a5558f7db87f3769dc655a8f6ecbb68b302" -dependencies = [ - "xmlparser", -] +checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" [[package]] name = "rustc-demangle" @@ -2715,11 +2983,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.44" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", "errno", "libc", "linux-raw-sys", @@ -2728,9 +2996,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.23" +version = "0.23.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" +checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321" dependencies = [ "log", "once_cell", @@ -2764,18 +3032,19 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" dependencies = [ "web-time", + "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.102.8" +version = "0.103.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" dependencies = [ "ring", "rustls-pki-types", @@ -2784,15 +3053,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -2824,7 +3093,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -2837,7 +3106,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", "core-foundation 0.10.0", "core-foundation-sys", "libc", @@ -2856,15 +3125,15 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "seq-macro" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" @@ -2883,7 +3152,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -2957,9 +3226,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "snafu" @@ -2976,10 +3245,10 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -2990,9 +3259,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" dependencies = [ "libc", "windows-sys 0.52.0", @@ -3041,7 +3310,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3063,9 +3332,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -3083,13 +3352,13 @@ dependencies = [ [[package]] name = "synstructure" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3098,7 +3367,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.1", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -3115,9 +3384,9 @@ dependencies = [ [[package]] name = "tar" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" dependencies = [ "filetime", "libc", @@ -3126,19 +3395,18 @@ dependencies = [ [[package]] name = "target-triple" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a4d50cdb458045afc8131fd91b64904da29548bcb63c7236e0844936c13078" +checksum = "1ac9aa371f599d22256307c24a9d748c041e548cbf599f35d890f9d365361790" [[package]] name = "tempfile" -version = "3.17.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ - "cfg-if", "fastrand", - "getrandom 0.3.1", + "getrandom 0.3.3", "once_cell", "rustix", "windows-sys 0.59.0", @@ -3171,7 +3439,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3182,7 +3450,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "test-case-core", ] @@ -3204,19 +3472,16 @@ checksum = "888d0c3c6db53c0fdab160d2ed5e12ba745383d3e85813f2ea0f2b1475ab553f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "test_utils" -version = "0.6.1" +version = "0.9.0" dependencies = [ - "arrow-array", - "arrow-schema", "delta_kernel", "itertools 0.13.0", "object_store", - "parquet", ] [[package]] @@ -3230,11 +3495,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl 2.0.12", ] [[package]] @@ -3245,18 +3510,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3291,9 +3556,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.7.6" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" dependencies = [ "displaydoc", "zerovec", @@ -3301,9 +3566,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" dependencies = [ "tinyvec_macros", ] @@ -3316,9 +3581,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.43.0" +version = "1.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" +checksum = "2513ca694ef9ede0fb23fe71a4ee4107cb102b9dc1930f6d0fd77aae068ae165" dependencies = [ "backtrace", "bytes", @@ -3338,7 +3603,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3353,9 +3618,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" +checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ "rustls", "tokio", @@ -3363,9 +3628,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.13" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" dependencies = [ "bytes", "futures-core", @@ -3376,9 +3641,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.20" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" dependencies = [ "serde", "serde_spanned", @@ -3388,26 +3653,33 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.24" +version = "0.22.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", + "toml_write", "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" + [[package]] name = "tower" version = "0.5.2" @@ -3455,7 +3727,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3518,9 +3790,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "trybuild" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b812699e0c4f813b872b373a4471717d9eb550da14b311058a4d9cf4173cbca6" +checksum = "1c9bf9513a2f4aeef5fdac8677d7d349c79fdbcc03b9c86da6e9d254f1e43be2" dependencies = [ "glob", "serde", @@ -3549,9 +3821,9 @@ checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-segmentation" @@ -3577,14 +3849,14 @@ version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" dependencies = [ - "base64 0.22.1", + "base64", "flate2", "log", "once_cell", "rustls", "rustls-pki-types", "url", - "webpki-roots", + "webpki-roots 0.26.11", ] [[package]] @@ -3598,12 +3870,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -3618,12 +3884,12 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.13.2" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ - "getrandom 0.3.1", - "rand 0.9.0", + "getrandom 0.3.3", + "rand 0.9.1", ] [[package]] @@ -3652,7 +3918,7 @@ checksum = "d674d135b4a8c1d7e813e2f8d1c9a58308aee4a680323066025e53132218bd91" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] @@ -3682,9 +3948,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -3717,7 +3983,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "wasm-bindgen-shared", ] @@ -3752,7 +4018,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3801,30 +4067,39 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.8" +version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.0", +] + +[[package]] +name = "webpki-roots" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2853738d1cc4f2da3a225c18ec6c3721abb31961096e9dbf5ab35fa88b19cfdb" dependencies = [ "rustls-pki-types", ] [[package]] name = "which" -version = "4.4.2" +version = "7.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762" dependencies = [ "either", - "home", - "once_cell", + "env_home", "rustix", + "winsafe", ] [[package]] name = "whoami" -version = "1.5.2" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d" +checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7" dependencies = [ "redox_syscall", "wasite", @@ -3853,7 +4128,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -3864,41 +4139,81 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.52.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ - "windows-targets 0.52.6", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", ] +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-registry" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ "windows-result", - "windows-strings", - "windows-targets 0.52.6", + "windows-strings 0.3.1", + "windows-targets 0.53.0", ] [[package]] name = "windows-result" -version = "0.2.0" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.1.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" dependencies = [ - "windows-result", - "windows-targets 0.52.6", + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", ] [[package]] @@ -3952,13 +4267,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3971,6 +4302,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3983,6 +4320,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3995,12 +4338,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -4013,6 +4368,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -4025,6 +4386,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -4037,6 +4404,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -4049,58 +4422,57 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" -version = "0.7.2" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" dependencies = [ "memchr", ] [[package]] -name = "wit-bindgen-rt" -version = "0.33.0" +name = "winsafe" +version = "0.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" -dependencies = [ - "bitflags 2.8.0", -] +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" [[package]] -name = "write16" -version = "1.0.0" +name = "wit-bindgen-rt" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags 2.9.1", +] [[package]] name = "writeable" -version = "0.5.5" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" [[package]] name = "xattr" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e105d177a3871454f754b33bb0ee637ecaaac997446375fd3e5d43a2ed00c909" +checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" dependencies = [ "libc", - "linux-raw-sys", "rustix", ] -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "yoke" -version = "0.7.5" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" dependencies = [ "serde", "stable_deref_trait", @@ -4110,13 +4482,13 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.5" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "synstructure", ] @@ -4128,63 +4500,42 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64" [[package]] name = "zerocopy" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" dependencies = [ - "byteorder", - "zerocopy-derive 0.7.35", -] - -[[package]] -name = "zerocopy" -version = "0.8.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79386d31a42a4996e3336b0919ddb90f81112af416270cff95b5f5af22b839c2" -dependencies = [ - "zerocopy-derive 0.8.18", + "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76331675d372f91bf8d17e13afbd5fe639200b73d01f0fc748bb059f9cca2db7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "zerofrom" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", "synstructure", ] @@ -4194,11 +4545,22 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + [[package]] name = "zerovec" -version = "0.10.4" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" dependencies = [ "yoke", "zerofrom", @@ -4207,20 +4569,20 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.10.3" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.101", ] [[package]] name = "zstd" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ "zstd-safe", ] diff --git a/Cargo.toml b/Cargo.toml index ec7993736..bcd69af76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,11 @@ members = [ "test-utils", "feature-tests", ] -# Only check / build main crates by default (check all with `--workspace`) -default-members = ["acceptance", "kernel"] +# note that in addition to the members above, the workspace includes examples: +# - inspect-table +# - read-table-changes +# - read-table-multi-threaded +# - read-table-single-threaded resolver = "2" [workspace.package] @@ -19,26 +22,11 @@ keywords = ["deltalake", "delta", "datalake"] license = "Apache-2.0" repository = "https://github.com/delta-io/delta-kernel-rs" readme = "README.md" -rust-version = "1.80" -version = "0.6.1" +rust-version = "1.81" +version = "0.9.0" [workspace.dependencies] -# When changing the arrow version range, also modify ffi/Cargo.toml which has -# its own arrow version ranges witeh modified features. Failure to do so will -# result in compilation errors as two different sets of arrow dependencies may -# be sourced -arrow = { version = ">=53, <55" } -arrow-arith = { version = ">=53, <55" } -arrow-array = { version = ">=53, <55" } -arrow-buffer = { version = ">=53, <55" } -arrow-cast = { version = ">=53, <55" } -arrow-data = { version = ">=53, <55" } -arrow-ord = { version = ">=53, <55" } -arrow-json = { version = ">=53, <55" } -arrow-select = { version = ">=53, <55" } -arrow-schema = { version = ">=53, <55" } -parquet = { version = ">=53, <55", features = ["object_store"] } object_store = { version = ">=0.11, <0.12" } -hdfs-native-object-store = "0.12.0" -hdfs-native = "0.10.0" +hdfs-native-object-store = "0.13.0" +hdfs-native = "0.11.0" walkdir = "2.5.0" diff --git a/README.md b/README.md index 6e25a2ddb..d02a141e2 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,10 @@ consumer's own `Engine` trait, the kernel has a feature flag to enable a default ```toml # fewer dependencies, requires consumer to implement Engine trait. # allows consumers to implement their own in-memory format -delta_kernel = "0.6.1" +delta_kernel = "0.9.0" # or turn on the default engine, based on arrow -delta_kernel = { version = "0.6.1", features = ["default-engine"] } +delta_kernel = { version = "0.9.0", features = ["default-engine"] } ``` ### Feature flags @@ -74,32 +74,19 @@ quickly. To enable engines that already integrate arrow to also integrate kernel to track a specific version of arrow that kernel depends on, we take as broad dependency on arrow versions as we can. -This means you can force kernel to rely on the specific arrow version that your engine already uses, -as long as it falls in that range. You can see the range in the `Cargo.toml` in the same folder as -this `README.md`. +We allow selecting the version of arrow to use via feature flags. Currently we support the following +flags: -For example, although arrow 53.1.0 has been released, you can force kernel to compile on 53.0 by -putting the following in your project's `Cargo.toml`: +- `arrow_53`: Use arrow version 53 +- `arrow_54`: Use arrow version 54 -```toml -[patch.crates-io] -arrow = "53.0" -arrow-arith = "53.0" -arrow-array = "53.0" -arrow-buffer = "53.0" -arrow-cast = "53.0" -arrow-data = "53.0" -arrow-ord = "53.0" -arrow-json = "53.0" -arrow-select = "53.0" -arrow-schema = "53.0" -parquet = "53.0" -``` +Note that if more than one `arrow_x` feature is enabled, kernel will default to the _lowest_ +specified flag. This also means that if you use `--all-features` you will get the lowest version of +arrow that kernel supports. -Note that unfortunately patching in `cargo` requires that _exactly one_ version matches your -specification. If only arrow "53.0.0" had been released the above will work, but if "53.0.1" where -to be released, the specification will break and you will need to provide a more restrictive -specification like `"=53.0.0"`. +If no arrow feature is enabled, but are least one of `default-engine`, `sync-engine`, +`arrow-conversion` or, `arrow-expression` is enabled, the lowest supported arrow version will be +enabled. ### Object Store You may also need to patch the `object_store` version used if the version of `parquet` you depend on diff --git a/acceptance/Cargo.toml b/acceptance/Cargo.toml index 2854c7c39..e844007ef 100644 --- a/acceptance/Cargo.toml +++ b/acceptance/Cargo.toml @@ -14,19 +14,14 @@ rust-version.workspace = true release = false [dependencies] -arrow-array = { workspace = true } -arrow-cast = { workspace = true } -arrow-ord = { workspace = true } -arrow-select = { workspace = true } -arrow-schema = { workspace = true } delta_kernel = { path = "../kernel", features = [ "default-engine", + "arrow_53", "developer-visibility", ] } futures = "0.3" itertools = "0.13" object_store = { workspace = true } -parquet = { workspace = true } serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "1" diff --git a/acceptance/src/data.rs b/acceptance/src/data.rs index c515d50c9..9685f29c3 100644 --- a/acceptance/src/data.rs +++ b/acceptance/src/data.rs @@ -1,15 +1,18 @@ use std::{path::Path, sync::Arc}; -use arrow_array::{Array, RecordBatch}; -use arrow_ord::sort::{lexsort_to_indices, SortColumn}; -use arrow_schema::{DataType, Schema}; -use arrow_select::{concat::concat_batches, filter::filter_record_batch, take::take}; +use delta_kernel::arrow::array::{Array, RecordBatch}; +use delta_kernel::arrow::compute::{ + concat_batches, filter_record_batch, lexsort_to_indices, take, SortColumn, +}; +use delta_kernel::arrow::datatypes::{DataType, Schema}; +use delta_kernel::parquet::arrow::async_reader::{ + ParquetObjectReader, ParquetRecordBatchStreamBuilder, +}; use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Engine, Error, Table}; use futures::{stream::TryStreamExt, StreamExt}; use itertools::Itertools; use object_store::{local::LocalFileSystem, ObjectStore}; -use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use crate::{TestCaseInfo, TestResult}; @@ -83,8 +86,8 @@ fn assert_schema_fields_match(schema: &Schema, golden: &Schema) { fn normalize_col(col: Arc) -> Arc { if let DataType::Timestamp(unit, Some(zone)) = col.data_type() { if **zone == *"+00:00" { - arrow_cast::cast::cast(&col, &DataType::Timestamp(*unit, Some("UTC".into()))) - .expect("Could not cast to UTC") + let data_type = DataType::Timestamp(*unit, Some("UTC".into())); + delta_kernel::arrow::compute::cast(&col, &data_type).expect("Could not cast to UTC") } else { col } @@ -106,7 +109,10 @@ fn assert_columns_match(actual: &[Arc], expected: &[Arc]) } } -pub async fn assert_scan_data(engine: Arc, test_case: &TestCaseInfo) -> TestResult<()> { +pub async fn assert_scan_metadata( + engine: Arc, + test_case: &TestCaseInfo, +) -> TestResult<()> { let table_root = test_case.table_root()?; let table = Table::new(table_root); let snapshot = table.snapshot(engine.as_ref(), None)?; diff --git a/acceptance/src/meta.rs b/acceptance/src/meta.rs index fb8370f7e..e1c87088e 100644 --- a/acceptance/src/meta.rs +++ b/acceptance/src/meta.rs @@ -89,7 +89,7 @@ impl TestCaseInfo { let tvm = TableVersionMetaData { version: snapshot.version(), properties: metadata - .configuration + .configuration() .iter() .map(|(k, v)| (k.clone(), v.clone())) .collect(), diff --git a/acceptance/tests/dat_reader.rs b/acceptance/tests/dat_reader.rs index 622f038a9..6ba0e6d35 100644 --- a/acceptance/tests/dat_reader.rs +++ b/acceptance/tests/dat_reader.rs @@ -37,7 +37,7 @@ fn reader_test(path: &Path) -> datatest_stable::Result<()> { ); case.assert_metadata(engine.clone()).await.unwrap(); - acceptance::data::assert_scan_data(engine.clone(), &case) + acceptance::data::assert_scan_metadata(engine.clone(), &case) .await .unwrap(); }); diff --git a/acceptance/tests/other.rs b/acceptance/tests/other.rs index 5a89f23de..826cf580d 100644 --- a/acceptance/tests/other.rs +++ b/acceptance/tests/other.rs @@ -3,7 +3,7 @@ /// Since each new `.rs` file in this directory results in increased build and link time, it is /// important to only add new files if absolutely necessary for code readability or test /// performance. -use delta_kernel::snapshot::CheckpointMetadata; +use delta_kernel::snapshot::LastCheckpointHint; #[test] fn test_checkpoint_serde() { @@ -11,7 +11,7 @@ fn test_checkpoint_serde() { "./tests/dat/out/reader_tests/generated/with_checkpoint/delta/_delta_log/_last_checkpoint", ) .unwrap(); - let cp: CheckpointMetadata = serde_json::from_reader(file).unwrap(); + let cp: LastCheckpointHint = serde_json::from_reader(file).unwrap(); assert_eq!(cp.version, 2) } @@ -26,8 +26,8 @@ async fn test_read_last_checkpoint() { let store = Arc::new(LocalFileSystem::new()); let prefix = Path::from(url.path()); - let client = ObjectStoreFileSystemClient::new(store, prefix); - let cp = read_last_checkpoint(&client, &url).await.unwrap().unwrap(); + let storage = ObjectStoreStorageHandler::new(store, prefix); + let cp = read_last_checkpoint(&storage, &url).await.unwrap().unwrap(); assert_eq!(cp.version, 2); } diff --git a/feature-tests/Cargo.toml b/feature-tests/Cargo.toml index 7e45e41e2..43f3773a7 100644 --- a/feature-tests/Cargo.toml +++ b/feature-tests/Cargo.toml @@ -12,7 +12,7 @@ version.workspace = true release = false [dependencies] -delta_kernel = { path = "../kernel" } +delta_kernel = { path = "../kernel", features = ["arrow_53"] } [features] default-engine = [ "delta_kernel/default-engine" ] diff --git a/feature-tests/src/lib.rs b/feature-tests/src/lib.rs index a421d86f9..6a07429f1 100644 --- a/feature-tests/src/lib.rs +++ b/feature-tests/src/lib.rs @@ -1,7 +1,10 @@ /// This is a compilation test to ensure that the default-engine feature flags are working -/// correctly. Run (from workspace root) with: +/// correctly. +/// +/// Run (from workspace root) with: /// 1. `cargo b -p feature_tests --features default-engine-rustls` /// 2. `cargo b -p feature_tests --features default-engine` +/// /// These run in our build CI. pub fn test_default_engine_feature_flags() { #[cfg(any(feature = "default-engine", feature = "default-engine-rustls"))] diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index aa4edc167..7caca111b 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -24,23 +24,13 @@ url = "2" delta_kernel = { path = "../kernel", default-features = false, features = [ "developer-visibility", ] } -delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.1" } - -# used if we use the default engine to be able to move arrow data into the c-ffi format -arrow-schema = { version = ">=53, <55", default-features = false, features = [ - "ffi", -], optional = true } -arrow-data = { version = ">=53, <55", default-features = false, features = [ - "ffi", -], optional = true } -arrow-array = { version = ">=53, <55", default-features = false, optional = true } +delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.9.0" } [build-dependencies] -cbindgen = "0.27.0" +cbindgen = "0.28" libc = "0.2.158" [dev-dependencies] -delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] } object_store = { workspace = true } rand = "0.8.5" test_utils = { path = "../test-utils" } @@ -50,13 +40,8 @@ trybuild = "1.0" [features] default = ["default-engine"] cloud = ["delta_kernel/cloud"] -default-engine = [ - "delta_kernel/default-engine", - "arrow-array", - "arrow-data", - "arrow-schema", -] +default-engine = ["delta_kernel/default-engine", "delta_kernel/arrow"] tracing = [ "tracing-core", "tracing-subscriber" ] -sync-engine = ["delta_kernel/sync-engine"] +sync-engine = ["delta_kernel/sync-engine", "delta_kernel/arrow"] developer-visibility = [] test-ffi = [] diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml index 8fb3144f4..ca3f57251 100644 --- a/ffi/cbindgen.toml +++ b/ffi/cbindgen.toml @@ -25,4 +25,4 @@ parse_deps = true # only crates found in this list will ever be parsed. # # default: there is no allow-list (NOTE: this is the opposite of []) -include = ["delta_kernel", "arrow-data", "arrow-schema"] +include = ["arrow", "arrow-data", "arrow-schema", "delta_kernel"] diff --git a/ffi/examples/read-table/arrow.c b/ffi/examples/read-table/arrow.c index c6214df6b..1dabacde0 100644 --- a/ffi/examples/read-table/arrow.c +++ b/ffi/examples/read-table/arrow.c @@ -11,6 +11,7 @@ ArrowContext* init_arrow_context() context->num_batches = 0; context->batches = NULL; context->cur_filter = NULL; + context->cur_transform = NULL; return context; } @@ -50,86 +51,10 @@ static GArrowRecordBatch* get_record_batch(FFI_ArrowArray* array, GArrowSchema* return record_batch; } -// Add columns to a record batch for each partition. In a "real" engine we would want to parse the -// string values into the correct data type. This program just adds all partition columns as strings -// for simplicity -static GArrowRecordBatch* add_partition_columns( - GArrowRecordBatch* record_batch, - PartitionList* partition_cols, - const CStringMap* partition_values) -{ - gint64 rows = garrow_record_batch_get_n_rows(record_batch); - gint64 cols = garrow_record_batch_get_n_columns(record_batch); - GArrowRecordBatch* cur_record_batch = record_batch; - GError* error = NULL; - for (uintptr_t i = 0; i < partition_cols->len; i++) { - char* col = partition_cols->cols[i]; - guint pos = cols + i; - KernelStringSlice key = { col, strlen(col) }; - char* partition_val = get_from_string_map(partition_values, key, allocate_string); - print_diag( - " Adding partition column '%s' with value '%s' at column %u\n", - col, - partition_val ? partition_val : "NULL", - pos); - GArrowStringArrayBuilder* builder = garrow_string_array_builder_new(); - for (gint64 i = 0; i < rows; i++) { - if (partition_val) { - garrow_string_array_builder_append_string(builder, partition_val, &error); - } else { - garrow_array_builder_append_null((GArrowArrayBuilder*)builder, &error); - } - if (report_g_error("Can't append to partition column builder", error)) { - break; - } - } - - if (partition_val) { - free(partition_val); - } - - if (error != NULL) { - printf("Giving up on column %s\n", col); - g_error_free(error); - g_object_unref(builder); - error = NULL; - continue; - } - - GArrowArray* partition_col = garrow_array_builder_finish((GArrowArrayBuilder*)builder, &error); - if (report_g_error("Can't build string array for partition column", error)) { - printf("Giving up on column %s\n", col); - g_error_free(error); - g_object_unref(builder); - error = NULL; - continue; - } - g_object_unref(builder); - - GArrowDataType* string_data_type = (GArrowDataType*)garrow_string_data_type_new(); - GArrowField* field = garrow_field_new(col, string_data_type); - GArrowRecordBatch* old_batch = cur_record_batch; - cur_record_batch = garrow_record_batch_add_column(old_batch, pos, field, partition_col, &error); - g_object_unref(old_batch); - g_object_unref(partition_col); - g_object_unref(string_data_type); - g_object_unref(field); - if (cur_record_batch == NULL) { - if (error != NULL) { - printf("Could not add column at %u: %s\n", pos, error->message); - g_error_free(error); - } - } - } - return cur_record_batch; -} - // append a batch to our context static void add_batch_to_context( ArrowContext* context, - ArrowFFIData* arrow_data, - PartitionList* partition_cols, - const CStringMap* partition_values) + ArrowFFIData* arrow_data) { GArrowSchema* schema = get_schema(&arrow_data->schema); GArrowRecordBatch* record_batch = get_record_batch(&arrow_data->array, schema); @@ -142,11 +67,6 @@ static void add_batch_to_context( g_object_unref(context->cur_filter); context->cur_filter = NULL; } - record_batch = add_partition_columns(record_batch, partition_cols, partition_values); - if (record_batch == NULL) { - printf("Failed to add partition columns, not adding batch\n"); - return; - } context->batches = g_list_append(context->batches, record_batch); context->num_batches++; print_diag( @@ -187,20 +107,52 @@ static GArrowBooleanArray* slice_to_arrow_bool_array(const KernelBoolSlice slice return (GArrowBooleanArray*)ret; } +// This will apply the transform in the context to the specified data. This consumes the passed +// ExclusiveEngineData and return a new transformed one +static ExclusiveEngineData* apply_transform( + struct EngineContext* context, + ExclusiveEngineData* data) { + if (!context->arrow_context->cur_transform) { + print_diag(" No transform needed"); + return data; + } + print_diag(" Applying transform\n"); + SharedExpressionEvaluator* evaluator = new_expression_evaluator( + context->engine, + context->read_schema, // input schema + context->arrow_context->cur_transform, + context->logical_schema); // output schema + ExternResultHandleExclusiveEngineData transformed_res = evaluate( + context->engine, + &data, + evaluator); + free_engine_data(data); + free_expression_evaluator(evaluator); + if (transformed_res.tag != OkHandleExclusiveEngineData) { + print_error("Failed to transform read data.", (Error*)transformed_res.err); + free_error((Error*)transformed_res.err); + return NULL; + } + return transformed_res.ok; +} + // This is the callback that will be called for each chunk of data read from the parquet file static void visit_read_data(void* vcontext, ExclusiveEngineData* data) { print_diag(" Converting read data to arrow\n"); struct EngineContext* context = vcontext; - ExternResultArrowFFIData arrow_res = get_raw_arrow_data(data, context->engine); + ExclusiveEngineData* transformed = apply_transform(context, data); + if (!transformed) { + exit(-1); + } + ExternResultArrowFFIData arrow_res = get_raw_arrow_data(transformed, context->engine); if (arrow_res.tag != OkArrowFFIData) { print_error("Failed to get arrow data.", (Error*)arrow_res.err); free_error((Error*)arrow_res.err); exit(-1); } ArrowFFIData* arrow_data = arrow_res.ok; - add_batch_to_context( - context->arrow_context, arrow_data, context->partition_cols, context->partition_values); + add_batch_to_context(context->arrow_context, arrow_data); free(arrow_data); // just frees the struct, the data and schema are freed/owned by add_batch_to_context } @@ -208,7 +160,8 @@ static void visit_read_data(void* vcontext, ExclusiveEngineData* data) void c_read_parquet_file( struct EngineContext* context, const KernelStringSlice path, - const KernelBoolSlice selection_vector) + const KernelBoolSlice selection_vector, + const Expression* transform) { int full_len = strlen(context->table_root) + path.len + 1; char* full_path = malloc(sizeof(char) * full_len); @@ -233,6 +186,7 @@ void c_read_parquet_file( } context->arrow_context->cur_filter = sel_array; } + context->arrow_context->cur_transform = transform; ExclusiveFileReadResultIterator* read_iter = read_res.ok; for (;;) { ExternResultbool ok_res = read_result_next(read_iter, context, visit_read_data); diff --git a/ffi/examples/read-table/arrow.h b/ffi/examples/read-table/arrow.h index 0236b238b..8f34cdd4f 100644 --- a/ffi/examples/read-table/arrow.h +++ b/ffi/examples/read-table/arrow.h @@ -15,13 +15,15 @@ typedef struct ArrowContext gsize num_batches; GList* batches; GArrowBooleanArray* cur_filter; + const Expression* cur_transform; } ArrowContext; ArrowContext* init_arrow_context(void); void c_read_parquet_file( struct EngineContext* context, const KernelStringSlice path, - const KernelBoolSlice selection_vector); + const KernelBoolSlice selection_vector, + const Expression* transform); void print_arrow_context(ArrowContext* context); void free_arrow_context(ArrowContext* context); diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 704559a59..3b74355d0 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -50,6 +50,7 @@ void scan_row_callback( int64_t size, const Stats* stats, const DvInfo* dv_info, + const Expression* transform, const CStringMap* partition_values) { (void)size; // not using this at the moment @@ -76,28 +77,34 @@ void scan_row_callback( context->partition_values = partition_values; print_partition_info(context, partition_values); #ifdef PRINT_ARROW_DATA - c_read_parquet_file(context, path, selection_vector); + c_read_parquet_file(context, path, selection_vector, transform); #endif free_bool_slice(selection_vector); context->partition_values = NULL; } -// For each chunk of scan data (which may contain multiple files to scan), kernel will call this -// function (named do_visit_scan_data to avoid conflict with visit_scan_data exported by kernel) -void do_visit_scan_data( - void* engine_context, - ExclusiveEngineData* engine_data, - KernelBoolSlice selection_vec, - const CTransforms* transforms) -{ +// For each chunk of scan metadata (which may contain multiple files to scan), kernel will call this +// function (named do_visit_scan_metadata to avoid conflict with visit_scan_metadata exported by +// kernel) +void do_visit_scan_metadata(void* engine_context, HandleSharedScanMetadata scan_metadata) { print_diag("\nScan iterator found some data to read\n Of this data, here is " "a selection vector\n"); - print_selection_vector(" ", &selection_vec); + struct EngineContext* context = engine_context; + + ExternResultKernelBoolSlice selection_vector_res = + selection_vector_from_scan_metadata(scan_metadata, context->engine); + if (selection_vector_res.tag != OkKernelBoolSlice) { + printf("Could not get selection vector from kernel\n"); + exit(-1); + } + KernelBoolSlice selection_vector = selection_vector_res.ok; + print_selection_vector(" ", &selection_vector); + // Ask kernel to iterate each individual file and call us back with extracted metadata print_diag("Asking kernel to call us back for each scan row (file to read)\n"); - visit_scan_data(engine_data, selection_vec, transforms, engine_context, scan_row_callback); - free_bool_slice(selection_vec); - free_engine_data(engine_data); + visit_scan_metadata(scan_metadata, engine_context, scan_row_callback); + free_bool_slice(selection_vector); + free_scan_metadata(scan_metadata); } // Called for each element of the partition StringSliceIterator. We just turn the slice into a @@ -112,15 +119,15 @@ void visit_partition(void* context, const KernelStringSlice partition) } // Build a list of partition column names. -PartitionList* get_partition_list(SharedGlobalScanState* state) +PartitionList* get_partition_list(SharedSnapshot* snapshot) { print_diag("Building list of partition columns\n"); - uintptr_t count = get_partition_column_count(state); + uintptr_t count = get_partition_column_count(snapshot); PartitionList* list = malloc(sizeof(PartitionList)); // We set the `len` to 0 here and use it to track how many items we've added to the list list->len = 0; list->cols = malloc(sizeof(char*) * count); - StringSliceIterator* part_iter = get_partition_columns(state); + StringSliceIterator* part_iter = get_partition_columns(snapshot); for (;;) { bool has_next = string_slice_next(part_iter, list, visit_partition); if (!has_next) { @@ -263,6 +270,8 @@ int main(int argc, char* argv[]) char* table_root = snapshot_table_root(snapshot, allocate_string); print_diag("Table root: %s\n", table_root); + PartitionList* partition_cols = get_partition_list(snapshot); + print_diag("Starting table scan\n\n"); ExternResultHandleSharedScan scan_res = scan(snapshot, engine, NULL); @@ -273,10 +282,11 @@ int main(int argc, char* argv[]) SharedScan* scan = scan_res.ok; SharedGlobalScanState* global_state = get_global_scan_state(scan); + SharedSchema* logical_schema = get_global_logical_schema(global_state); SharedSchema* read_schema = get_global_read_schema(global_state); - PartitionList* partition_cols = get_partition_list(global_state); struct EngineContext context = { global_state, + logical_schema, read_schema, table_root, engine, @@ -287,26 +297,28 @@ int main(int argc, char* argv[]) #endif }; - ExternResultHandleSharedScanDataIterator data_iter_res = kernel_scan_data_init(engine, scan); - if (data_iter_res.tag != OkHandleSharedScanDataIterator) { - print_error("Failed to construct scan data iterator.", (Error*)data_iter_res.err); + ExternResultHandleSharedScanMetadataIterator data_iter_res = + scan_metadata_iter_init(engine, scan); + if (data_iter_res.tag != OkHandleSharedScanMetadataIterator) { + print_error("Failed to construct scan metadata iterator.", (Error*)data_iter_res.err); free_error((Error*)data_iter_res.err); return -1; } - SharedScanDataIterator* data_iter = data_iter_res.ok; + SharedScanMetadataIterator* data_iter = data_iter_res.ok; - print_diag("\nIterating scan data\n"); + print_diag("\nIterating scan metadata\n"); // iterate scan files for (;;) { - ExternResultbool ok_res = kernel_scan_data_next(data_iter, &context, do_visit_scan_data); + ExternResultbool ok_res = + scan_metadata_next(data_iter, &context, do_visit_scan_metadata); if (ok_res.tag != Okbool) { - print_error("Failed to iterate scan data.", (Error*)ok_res.err); + print_error("Failed to iterate scan metadata.", (Error*)ok_res.err); free_error((Error*)ok_res.err); return -1; } else if (!ok_res.ok) { - print_diag("Scan data iterator done\n"); + print_diag("Scan metadata iterator done\n"); break; } } @@ -319,9 +331,10 @@ int main(int argc, char* argv[]) context.arrow_context = NULL; #endif - free_kernel_scan_data(data_iter); + free_scan_metadata_iter(data_iter); free_scan(scan); - free_global_read_schema(read_schema); + free_schema(logical_schema); + free_schema(read_schema); free_global_scan_state(global_state); free_snapshot(snapshot); free_engine(engine); diff --git a/ffi/examples/read-table/read_table.h b/ffi/examples/read-table/read_table.h index 28d9c72dc..cf55863d9 100644 --- a/ffi/examples/read-table/read_table.h +++ b/ffi/examples/read-table/read_table.h @@ -14,6 +14,7 @@ typedef struct PartitionList struct EngineContext { SharedGlobalScanState* global_state; + SharedSchema* logical_schema; SharedSchema* read_schema; char* table_root; SharedExternEngine* engine; diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 8c29675a6..a70bd5f5a 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -273,7 +273,8 @@ void print_schema(SharedSnapshot* snapshot) .visit_timestamp = visit_timestamp, .visit_timestamp_ntz = visit_timestamp_ntz, }; - uintptr_t schema_list_id = visit_schema(snapshot, &visitor); + SharedSchema* schema = logical_schema(snapshot); + uintptr_t schema_list_id = visit_schema(schema, &visitor); #ifdef VERBOSE printf("Schema returned in list %" PRIxPTR "\n", schema_list_id); #endif @@ -281,5 +282,6 @@ void print_schema(SharedSnapshot* snapshot) printf("Schema:\n"); print_list(&builder, schema_list_id, 0, 0); printf("\n"); + free_schema(schema); free_builder(builder); } diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index eee88d1dc..f668860c5 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -87,7 +87,8 @@ struct BinaryData { uintptr_t len; }; struct Decimal { - uint64_t value[2]; + int64_t hi; + uint64_t lo; uint8_t precision; uint8_t scale; }; @@ -202,15 +203,15 @@ void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStri } void visit_expr_decimal_literal(void* data, uintptr_t sibling_list_id, - uint64_t value_ms, + int64_t value_ms, uint64_t value_ls, uint8_t precision, uint8_t scale) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Decimal; struct Decimal* dec = &literal->value.decimal; - dec->value[0] = value_ms; - dec->value[1] = value_ls; + dec->hi = value_ms; + dec->lo = value_ls; dec->precision = precision; dec->scale = scale; put_expr_item(data, sibling_list_id, literal, Literal); diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h index 7507c8de0..0b36c9de7 100644 --- a/ffi/examples/visit-expression/expression_print.h +++ b/ffi/examples/visit-expression/expression_print.h @@ -144,9 +144,9 @@ void print_tree_helper(ExpressionItem ref, int depth) { } case Decimal: { struct Decimal* dec = &lit->value.decimal; - printf("Decimal(%lld,%lld,%d,%d)\n", - (long long)dec->value[0], - (long long)dec->value[1], + printf("Decimal(%lld,%llu,%d,%d)\n", + (long long)dec->hi, + (unsigned long long)dec->lo, dec->precision, dec->scale); break; diff --git a/ffi/src/engine_data.rs b/ffi/src/engine_data.rs index 3363c9034..ad9b64644 100644 --- a/ffi/src/engine_data.rs +++ b/ffi/src/engine_data.rs @@ -1,9 +1,18 @@ //! EngineData related ffi code -use delta_kernel::{DeltaResult, EngineData}; +#[cfg(feature = "default-engine")] +use delta_kernel::arrow::array::{ + ffi::{FFI_ArrowArray, FFI_ArrowSchema}, + ArrayData, StructArray, +}; +#[cfg(feature = "default-engine")] +use delta_kernel::DeltaResult; +use delta_kernel::EngineData; use std::ffi::c_void; -use crate::{ExclusiveEngineData, ExternResult, IntoExternResult, SharedExternEngine}; +use crate::ExclusiveEngineData; +#[cfg(feature = "default-engine")] +use crate::{ExternResult, IntoExternResult, SharedExternEngine}; use super::handle::Handle; @@ -45,8 +54,8 @@ unsafe fn get_raw_engine_data_impl(data: &mut Handle) -> &m #[cfg(feature = "default-engine")] #[repr(C)] pub struct ArrowFFIData { - pub array: arrow_data::ffi::FFI_ArrowArray, - pub schema: arrow_schema::ffi::FFI_ArrowSchema, + pub array: FFI_ArrowArray, + pub schema: FFI_ArrowSchema, } // TODO: This should use a callback to avoid having to have the engine free the struct @@ -71,16 +80,16 @@ pub unsafe extern "C" fn get_raw_arrow_data( // TODO: This method leaks the returned pointer memory. How will the engine free it? #[cfg(feature = "default-engine")] fn get_raw_arrow_data_impl(data: Box) -> DeltaResult<*mut ArrowFFIData> { - let record_batch: arrow_array::RecordBatch = data + let record_batch: delta_kernel::arrow::array::RecordBatch = data .into_any() .downcast::() .map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))? .into(); - let sa: arrow_array::StructArray = record_batch.into(); - let array_data: arrow_data::ArrayData = sa.into(); + let sa: StructArray = record_batch.into(); + let array_data: ArrayData = sa.into(); // these call `clone`. is there a way to not copy anything and what exactly are they cloning? - let array = arrow_data::ffi::FFI_ArrowArray::new(&array_data); - let schema = arrow_schema::ffi::FFI_ArrowSchema::try_from(array_data.data_type())?; + let array = FFI_ArrowArray::new(&array_data); + let schema = FFI_ArrowSchema::try_from(array_data.data_type())?; let ret_data = Box::new(ArrowFFIData { array, schema }); Ok(Box::leak(ret_data)) } diff --git a/ffi/src/engine_funcs.rs b/ffi/src/engine_funcs.rs index 1afb60510..03ae289ed 100644 --- a/ffi/src/engine_funcs.rs +++ b/ffi/src/engine_funcs.rs @@ -2,14 +2,17 @@ use std::sync::Arc; -use delta_kernel::{schema::Schema, DeltaResult, FileDataReadResultIterator}; +use delta_kernel::schema::{DataType, Schema, SchemaRef}; +use delta_kernel::{ + DeltaResult, EngineData, Expression, ExpressionEvaluator, FileDataReadResultIterator, +}; use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; use crate::{ - scan::SharedSchema, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, - KernelStringSlice, NullableCvoid, SharedExternEngine, TryFromStringSlice, + ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelStringSlice, + NullableCvoid, SharedExternEngine, SharedSchema, TryFromStringSlice, }; use super::handle::Handle; @@ -51,6 +54,8 @@ impl Drop for FileReadResultIterator { /// /// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by /// [`free_read_result_iter`]. The visitor function pointer must be non-null. +/// +/// [`free_engine_data`]: crate::free_engine_data #[no_mangle] pub unsafe extern "C" fn read_result_next( mut data: Handle, @@ -97,7 +102,7 @@ pub unsafe extern "C" fn free_read_result_iter(data: Handle, + engine: Handle, // TODO Does this cause a free? file: &FileMeta, physical_schema: Handle, ) -> ExternResult> { @@ -115,7 +120,7 @@ fn read_parquet_file_impl( physical_schema: Arc, ) -> DeltaResult> { let engine = extern_engine.engine(); - let parquet_handler = engine.get_parquet_handler(); + let parquet_handler = engine.parquet_handler(); let location = Url::parse(path?)?; let delta_fm = delta_kernel::FileMeta { location, @@ -130,3 +135,111 @@ fn read_parquet_file_impl( }); Ok(res.into()) } + +// Expression Eval + +#[handle_descriptor(target=dyn ExpressionEvaluator, mutable=false)] +pub struct SharedExpressionEvaluator; + +/// Creates a new expression evaluator as provided by the passed engines `EvaluationHandler`. +/// +/// # Safety +/// Caller is responsible for calling with a valid `Engine`, `Expression`, and `SharedSchema`s +#[no_mangle] +pub unsafe extern "C" fn new_expression_evaluator( + engine: Handle, + input_schema: Handle, + expression: &Expression, + // TODO: Make this a data_type, and give a way for c code to go between schema <-> datatype + output_type: Handle, +) -> Handle { + let engine = unsafe { engine.clone_as_arc() }; + let input_schema = unsafe { input_schema.clone_as_arc() }; + let output_type: DataType = output_type.as_ref().clone().into(); + new_expression_evaluator_impl(engine, input_schema, expression, output_type) +} + +fn new_expression_evaluator_impl( + extern_engine: Arc, + input_schema: SchemaRef, + expression: &Expression, + output_type: DataType, +) -> Handle { + let engine = extern_engine.engine(); + let evaluator = engine.evaluation_handler().new_expression_evaluator( + input_schema, + expression.clone(), + output_type, + ); + evaluator.into() +} + +/// Free an expression evaluator +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +#[no_mangle] +pub unsafe extern "C" fn free_expression_evaluator(evaluator: Handle) { + debug!("engine released evaluator"); + evaluator.drop_handle(); +} + +/// Use the passed `evaluator` to evaluate its expression against the passed `batch` data. +/// +/// # Safety +/// Caller is responsible for calling with a valid `Engine`, `ExclusiveEngineData`, and `Evaluator` +#[no_mangle] +pub unsafe extern "C" fn evaluate( + engine: Handle, + batch: &mut Handle, + evaluator: Handle, +) -> ExternResult> { + let engine = unsafe { engine.clone_as_arc() }; + let batch = unsafe { batch.as_mut() }; + let evaluator = unsafe { evaluator.clone_as_arc() }; + let res = evaluate_impl(batch, evaluator.as_ref()); + res.into_extern_result(&engine.as_ref()) +} + +fn evaluate_impl( + batch: &dyn EngineData, + evaluator: &dyn ExpressionEvaluator, +) -> DeltaResult> { + evaluator.evaluate(batch).map(Into::into) +} + +#[cfg(test)] +mod tests { + use super::{free_expression_evaluator, new_expression_evaluator}; + use crate::{free_engine, handle::Handle, tests::get_default_engine, SharedSchema}; + use delta_kernel::{ + schema::{DataType, StructField, StructType}, + Expression, + }; + use std::sync::Arc; + + #[test] + fn test_new_evaluator() { + let engine = get_default_engine(); + let in_schema = Arc::new(StructType::new(vec![StructField::new( + "a", + DataType::LONG, + true, + )])); + let expr = Expression::literal(1); + let output_type: Handle = in_schema.clone().into(); + let in_schema_handle: Handle = in_schema.into(); + unsafe { + let evaluator = new_expression_evaluator( + engine.shallow_copy(), + in_schema_handle.shallow_copy(), + &expr, + output_type.shallow_copy(), + ); + in_schema_handle.drop_handle(); + output_type.drop_handle(); + free_engine(engine); + free_expression_evaluator(evaluator); + } + } +} diff --git a/ffi/src/error.rs b/ffi/src/error.rs index a615d0330..fd5fb87e2 100644 --- a/ffi/src/error.rs +++ b/ffi/src/error.rs @@ -52,6 +52,7 @@ pub enum KernelError { ChangeDataFeedUnsupported, ChangeDataFeedIncompatibleSchema, InvalidCheckpoint, + LiteralExpressionTransformError, } impl From for KernelError { @@ -110,6 +111,9 @@ impl From for KernelError { KernelError::ChangeDataFeedIncompatibleSchema } Error::InvalidCheckpoint(_) => KernelError::InvalidCheckpoint, + Error::LiteralExpressionTransformError(_) => { + KernelError::LiteralExpressionTransformError + } } } } diff --git a/ffi/src/expressions/engine.rs b/ffi/src/expressions/engine.rs index 2e839c50f..9492feafd 100644 --- a/ffi/src/expressions/engine.rs +++ b/ffi/src/expressions/engine.rs @@ -28,12 +28,14 @@ impl KernelExpressionVisitorState { /// /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, /// along with a visitor function that can be invoked to recursively visit the predicate. This -/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// engine state must be valid until the call to [`scan::scan`] returns. Inside that method, the /// kernel allocates visitor state, which becomes the second argument to the predicate visitor /// invocation along with the engine-provided predicate pointer. The visitor state is valid for the /// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and /// kernel each retain ownership of their respective objects, with no need to coordinate memory /// lifetimes with the other. +/// +/// [`scan::scan`]: crate::scan::scan #[repr(C)] pub struct EnginePredicate { pub predicate: *mut c_void, @@ -45,7 +47,7 @@ fn wrap_expression(state: &mut KernelExpressionVisitorState, expr: impl Into Option { diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index a5116db47..c8ce1b2d4 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -53,8 +53,8 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi /// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed /// to visitor methods /// TODO: Visit type information in struct field and null. This will likely involve using the schema -/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant -/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 +/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is +/// the relevant issue: #[repr(C)] pub struct EngineExpressionVisitor { /// An opaque engine state pointer @@ -96,7 +96,7 @@ pub struct EngineExpressionVisitor { pub visit_literal_decimal: extern "C" fn( data: *mut c_void, sibling_list_id: usize, - value_ms: u64, + value_ms: i64, value_ls: u64, precision: u8, scale: u8, @@ -189,6 +189,29 @@ pub struct EngineExpressionVisitor { pub unsafe extern "C" fn visit_expression( expression: &Handle, visitor: &mut EngineExpressionVisitor, +) -> usize { + visit_expression_internal(expression.as_ref(), visitor) +} + +/// Visit the expression of the passed [`Expression`] pointer using the provided `visitor`. See the +/// documentation of [`EngineExpressionVisitor`] for a description of how this visitor works. +/// +/// This method returns the id that the engine generated for the top level expression +/// +/// # Safety +/// +/// The caller must pass a valid Expression pointer and expression visitor +#[no_mangle] +pub unsafe extern "C" fn visit_expression_ref( + expression: &Expression, + visitor: &mut EngineExpressionVisitor, +) -> usize { + visit_expression_internal(expression, visitor) +} + +fn visit_expression_internal( + expression: &Expression, + visitor: &mut EngineExpressionVisitor, ) -> usize { macro_rules! call { ( $visitor:ident, $visitor_fn:ident $(, $extra_args:expr) *) => { @@ -295,14 +318,12 @@ pub unsafe extern "C" fn visit_expression( buf.len() ), Scalar::Decimal(value, precision, scale) => { - let ms: u64 = (value >> 64) as u64; - let ls: u64 = *value as u64; call!( visitor, visit_literal_decimal, sibling_list_id, - ms, - ls, + (value >> 64) as i64, + *value as u64, *precision, *scale ) @@ -367,6 +388,6 @@ pub unsafe extern "C" fn visit_expression( } } let top_level = call!(visitor, make_field_list, 1); - visit_expression_impl(visitor, expression.as_ref(), top_level); + visit_expression_impl(visitor, expression, top_level); top_level } diff --git a/ffi/src/handle.rs b/ffi/src/handle.rs index 30b695ecc..6a29cad52 100644 --- a/ffi/src/handle.rs +++ b/ffi/src/handle.rs @@ -88,14 +88,14 @@ mod private { /// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must /// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel /// API call at a time. If thread races are possible, the handle should be protected with a - /// mutex. Due to Rust [reference - /// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references), - /// this requirement applies even for API calls that appear to be read-only (because Rust code - /// always receives the handle as mutable). + /// mutex. Due to Rust [reference rules], this requirement applies even for API calls that + /// appear to be read-only (because Rust code always receives the handle as mutable). /// /// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can /// freely access shared (non-mutable) handles. /// + /// [reference rules]: + /// https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references /// cbindgen:transparent-typedef #[repr(transparent)] pub struct Handle { diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index caf04ef2c..e24553158 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -11,6 +11,7 @@ use std::sync::Arc; use tracing::debug; use url::Url; +use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot; use delta_kernel::{DeltaResult, Engine, EngineData, Table}; use delta_kernel_ffi_macros::handle_descriptor; @@ -80,7 +81,7 @@ impl Iterator for EngineIterator { /// /// Whoever instantiates the struct must ensure it does not outlive the data it points to. The /// compiler cannot help us here, because raw pointers don't have lifetimes. A good rule of thumb is -/// to always use the [`kernel_string_slice`] macro to create string slices, and to avoid returning +/// to always use the `kernel_string_slice` macro to create string slices, and to avoid returning /// a string slice from a code block or function (since the move risks over-extending its lifetime): /// /// ```ignore @@ -330,7 +331,9 @@ pub unsafe extern "C" fn free_row_indexes(slice: KernelRowIndexArray) { /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into /// some kernel calls to operate on the data, or can be converted into the raw data as read by the /// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] -#[handle_descriptor(target=dyn EngineData, mutable=true, sized=false)] +/// +/// [`get_raw_engine_data`]: crate::engine_data::get_raw_engine_data +#[handle_descriptor(target=dyn EngineData, mutable=true)] pub struct ExclusiveEngineData; /// Drop an `ExclusiveEngineData`. @@ -352,12 +355,14 @@ pub trait ExternEngine: Send + Sync { #[handle_descriptor(target=dyn ExternEngine, mutable=false)] pub struct SharedExternEngine; +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] struct ExternEngineVtable { // Actual engine instance to use engine: Arc, allocate_error: AllocateErrorFn, } +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] impl Drop for ExternEngineVtable { fn drop(&mut self) { debug!("dropping engine interface"); @@ -368,6 +373,7 @@ impl Drop for ExternEngineVtable { /// /// Kernel doesn't use any threading or concurrency. If engine chooses to do so, engine is /// responsible for handling any races that could result. +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] unsafe impl Send for ExternEngineVtable {} /// # Safety @@ -379,8 +385,10 @@ unsafe impl Send for ExternEngineVtable {} /// Basically, by failing to implement these traits, we forbid the engine from being able to declare /// its thread-safety (because rust assumes it is not threadsafe). By implementing them, we leave it /// up to the engine to enforce thread safety if engine chooses to use threads at all. +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] unsafe impl Sync for ExternEngineVtable {} +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] impl ExternEngine for ExternEngineVtable { fn engine(&self) -> Arc { self.engine.clone() @@ -561,6 +569,9 @@ pub unsafe extern "C" fn free_engine(engine: Handle) { engine.drop_handle(); } +#[handle_descriptor(target=Schema, mutable=false, sized=true)] +pub struct SharedSchema; + #[handle_descriptor(target=Snapshot, mutable=false, sized=true)] pub struct SharedSnapshot; @@ -607,12 +618,32 @@ pub unsafe extern "C" fn version(snapshot: Handle) -> u64 { snapshot.version() } +/// Get the logical schema of the specified snapshot +/// +/// # Safety +/// +/// Caller is responsible for passing a valid snapshot handle. +#[no_mangle] +pub unsafe extern "C" fn logical_schema(snapshot: Handle) -> Handle { + let snapshot = unsafe { snapshot.as_ref() }; + snapshot.schema().into() +} + +/// Free a schema +/// +/// # Safety +/// Engine is responsible for providing a valid schema handle. +#[no_mangle] +pub unsafe extern "C" fn free_schema(schema: Handle) { + schema.drop_handle(); +} + /// Get the resolved root of the table. This should be used in any future calls that require /// constructing a path /// /// # Safety /// -/// Caller is responsible for passing a valid handle. +/// Caller is responsible for passing a valid snapshot handle. #[no_mangle] pub unsafe extern "C" fn snapshot_table_root( snapshot: Handle, @@ -623,6 +654,30 @@ pub unsafe extern "C" fn snapshot_table_root( allocate_fn(kernel_string_slice!(table_root)) } +/// Get a count of the number of partition columns for this snapshot +/// +/// # Safety +/// Caller is responsible for passing a valid snapshot handle +#[no_mangle] +pub unsafe extern "C" fn get_partition_column_count(snapshot: Handle) -> usize { + let snapshot = unsafe { snapshot.as_ref() }; + snapshot.metadata().partition_columns().len() +} + +/// Get an iterator of the list of partition columns for this snapshot. +/// +/// # Safety +/// Caller is responsible for passing a valid snapshot handle. +#[no_mangle] +pub unsafe extern "C" fn get_partition_columns( + snapshot: Handle, +) -> Handle { + let snapshot = unsafe { snapshot.as_ref() }; + let iter: Box = + Box::new(snapshot.metadata().partition_columns().clone().into_iter()); + iter.into() +} + type StringIter = dyn Iterator + Send; #[handle_descriptor(target=StringIter, mutable=true, sized=false)] @@ -630,8 +685,11 @@ pub struct StringSliceIterator; /// # Safety /// -/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by -/// [kernel_scan_data_free]. The visitor function pointer must be non-null. +/// The iterator must be valid (returned by [`scan_metadata_iter_init`]) and not yet freed by +/// [`free_scan_metadata_iter`]. The visitor function pointer must be non-null. +/// +/// [`scan_metadata_iter_init`]: crate::scan::scan_metadata_iter_init +/// [`free_scan_metadata_iter`]: crate::scan::free_scan_metadata_iter #[no_mangle] pub unsafe extern "C" fn string_slice_next( data: Handle, @@ -718,8 +776,8 @@ impl Default for ReferenceSet { #[cfg(test)] mod tests { use delta_kernel::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine}; - use object_store::{memory::InMemory, path::Path}; - use test_utils::{actions_to_string, add_commit, TestAction}; + use object_store::memory::InMemory; + use test_utils::{actions_to_string, actions_to_string_partitioned, add_commit, TestAction}; use super::*; use crate::error::{EngineError, KernelError}; @@ -768,7 +826,7 @@ mod tests { } } - fn get_default_engine() -> Handle { + pub(crate) fn get_default_engine() -> Handle { let path = "memory:///doesntmatter/foo"; let path = kernel_string_slice!(path); let builder = unsafe { ok_or_panic(get_engine_builder(path, allocate_err)) }; @@ -792,11 +850,7 @@ mod tests { actions_to_string(vec![TestAction::Metadata]), ) .await?; - let engine = DefaultEngine::new( - storage.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); let engine = engine_to_handle(Arc::new(engine), allocate_err); let path = "memory:///"; @@ -816,6 +870,42 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_snapshot_partition_cols() -> Result<(), Box> { + let storage = Arc::new(InMemory::new()); + add_commit( + storage.as_ref(), + 0, + actions_to_string_partitioned(vec![TestAction::Metadata]), + ) + .await?; + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); + let engine = engine_to_handle(Arc::new(engine), allocate_err); + let path = "memory:///"; + + let snapshot = + unsafe { ok_or_panic(snapshot(kernel_string_slice!(path), engine.shallow_copy())) }; + + let partition_count = unsafe { get_partition_column_count(snapshot.shallow_copy()) }; + assert_eq!(partition_count, 1, "Should have one partition"); + + let partition_iter = unsafe { get_partition_columns(snapshot.shallow_copy()) }; + + #[no_mangle] + extern "C" fn visit_partition(_context: NullableCvoid, slice: KernelStringSlice) { + let s = unsafe { String::try_from_slice(&slice) }.unwrap(); + assert_eq!(s.as_str(), "val", "Partition col should be 'val'"); + } + while unsafe { string_slice_next(partition_iter.shallow_copy(), None, visit_partition) } { + // validate happens inside visit_partition + } + + unsafe { free_string_slice_data(partition_iter) } + unsafe { free_snapshot(snapshot) } + unsafe { free_engine(engine) } + Ok(()) + } + #[test] #[cfg(feature = "sync-engine")] fn sync_engine() { diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 73f691010..1a797566b 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -3,11 +3,10 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; -use delta_kernel::scan::state::{visit_scan_files, DvInfo, GlobalScanState}; -use delta_kernel::scan::{Scan, ScanData}; -use delta_kernel::schema::Schema; +use delta_kernel::scan::state::{DvInfo, GlobalScanState}; +use delta_kernel::scan::{Scan, ScanMetadata}; use delta_kernel::snapshot::Snapshot; -use delta_kernel::{DeltaResult, Error, ExpressionRef}; +use delta_kernel::{DeltaResult, Error, Expression, ExpressionRef}; use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; @@ -15,23 +14,57 @@ use url::Url; use crate::expressions::engine::{ unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState, }; +use crate::expressions::SharedExpression; use crate::{ - kernel_string_slice, AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, - IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, - SharedExternEngine, SharedSnapshot, StringIter, StringSliceIterator, TryFromStringSlice, + kernel_string_slice, AllocateStringFn, ExternEngine, ExternResult, IntoExternResult, + KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine, + SharedSchema, SharedSnapshot, TryFromStringSlice, }; use super::handle::Handle; // TODO: Why do we even need to expose a scan, when the only thing an engine can do with it is -// handit back to the kernel by calling `kernel_scan_data_init`? There isn't even an FFI method to +// handit back to the kernel by calling `scan_metadata_iter_init`? There isn't even an FFI method to // drop it! #[handle_descriptor(target=Scan, mutable=false, sized=true)] pub struct SharedScan; +#[handle_descriptor(target=ScanMetadata, mutable=false, sized=true)] +pub struct SharedScanMetadata; + +/// Drop a `SharedScanMetadata`. +/// +/// # Safety +/// +/// Caller is responsible for passing a valid scan data handle. +#[no_mangle] +pub unsafe extern "C" fn free_scan_metadata(scan_metadata: Handle) { + scan_metadata.drop_handle(); +} + +/// Get a selection vector out of a [`SharedScanMetadata`] struct +/// +/// # Safety +/// Engine is responsible for providing valid pointers for each argument +#[no_mangle] +pub unsafe extern "C" fn selection_vector_from_scan_metadata( + scan_metadata: Handle, + engine: Handle, +) -> ExternResult { + let scan_metadata = unsafe { scan_metadata.as_ref() }; + selection_vector_from_scan_metadata_impl(scan_metadata).into_extern_result(&engine.as_ref()) +} + +fn selection_vector_from_scan_metadata_impl( + scan_metadata: &ScanMetadata, +) -> DeltaResult { + Ok(scan_metadata.scan_files.selection_vector.clone().into()) +} + /// Drops a scan. +/// /// # Safety -/// Caller is responsible for passing a [valid][Handle#Validity] scan handle. +/// Caller is responsible for passing a valid scan handle. #[no_mangle] pub unsafe extern "C" fn free_scan(scan: Handle) { scan.drop_handle(); @@ -70,8 +103,6 @@ fn scan_impl( #[handle_descriptor(target=GlobalScanState, mutable=false, sized=true)] pub struct SharedGlobalScanState; -#[handle_descriptor(target=Schema, mutable=false, sized=true)] -pub struct SharedSchema; /// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`] /// for more information. @@ -99,36 +130,17 @@ pub unsafe extern "C" fn get_global_read_schema( state.physical_schema.clone().into() } -/// Free a global read schema -/// -/// # Safety -/// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`] -#[no_mangle] -pub unsafe extern "C" fn free_global_read_schema(schema: Handle) { - schema.drop_handle(); -} - -/// Get a count of the number of partition columns for this scan -/// -/// # Safety -/// Caller is responsible for passing a valid global scan pointer. -#[no_mangle] -pub unsafe extern "C" fn get_partition_column_count(state: Handle) -> usize { - let state = unsafe { state.as_ref() }; - state.partition_columns.len() -} - -/// Get an iterator of the list of partition columns for this scan. +/// Get the kernel view of the physical read schema that an engine should read from parquet file in +/// a scan /// /// # Safety -/// Caller is responsible for passing a valid global scan pointer. +/// Engine is responsible for providing a valid GlobalScanState pointer #[no_mangle] -pub unsafe extern "C" fn get_partition_columns( +pub unsafe extern "C" fn get_global_logical_schema( state: Handle, -) -> Handle { +) -> Handle { let state = unsafe { state.as_ref() }; - let iter: Box = Box::new(state.partition_columns.clone().into_iter()); - iter.into() + state.logical_schema.clone().into() } /// # Safety @@ -145,11 +157,11 @@ pub unsafe extern "C" fn free_global_scan_state(state: Handle Allow the iterator to be accessed safely by multiple threads. // Box -> Wrap its unsized content this struct is fixed-size with thin pointers. - // Item = DeltaResult - data: Mutex> + Send>>, + // Item = DeltaResult + data: Mutex> + Send>>, // Also keep a reference to the external engine for its error allocator. The default Parquet and // Json handlers don't hold any reference to the tokio reactor they rely on, so the iterator @@ -157,85 +169,83 @@ pub struct KernelScanDataIterator { engine: Arc, } -#[handle_descriptor(target=KernelScanDataIterator, mutable=false, sized=true)] -pub struct SharedScanDataIterator; +#[handle_descriptor(target=ScanMetadataIterator, mutable=false, sized=true)] +pub struct SharedScanMetadataIterator; -impl Drop for KernelScanDataIterator { +impl Drop for ScanMetadataIterator { fn drop(&mut self) { - debug!("dropping KernelScanDataIterator"); + debug!("dropping ScanMetadataIterator"); } } /// Get an iterator over the data needed to perform a scan. This will return a -/// [`KernelScanDataIterator`] which can be passed to [`kernel_scan_data_next`] to get the actual -/// data in the iterator. +/// [`ScanMetadataIterator`] which can be passed to [`scan_metadata_next`] to get the +/// actual data in the iterator. /// /// # Safety /// /// Engine is responsible for passing a valid [`SharedExternEngine`] and [`SharedScan`] #[no_mangle] -pub unsafe extern "C" fn kernel_scan_data_init( +pub unsafe extern "C" fn scan_metadata_iter_init( engine: Handle, scan: Handle, -) -> ExternResult> { +) -> ExternResult> { let engine = unsafe { engine.clone_as_arc() }; let scan = unsafe { scan.as_ref() }; - kernel_scan_data_init_impl(&engine, scan).into_extern_result(&engine.as_ref()) + scan_metadata_iter_init_impl(&engine, scan).into_extern_result(&engine.as_ref()) } -fn kernel_scan_data_init_impl( +fn scan_metadata_iter_init_impl( engine: &Arc, scan: &Scan, -) -> DeltaResult> { - let scan_data = scan.scan_data(engine.engine().as_ref())?; - let data = KernelScanDataIterator { - data: Mutex::new(Box::new(scan_data)), +) -> DeltaResult> { + let scan_metadata = scan.scan_metadata(engine.engine().as_ref())?; + let data = ScanMetadataIterator { + data: Mutex::new(Box::new(scan_metadata)), engine: engine.clone(), }; Ok(Arc::new(data).into()) } -/// Call the provided `engine_visitor` on the next scan data item. The visitor will be provided with -/// a selection vector and engine data. It is the responsibility of the _engine_ to free these when -/// it is finished by calling [`free_bool_slice`] and [`free_engine_data`] respectively. +/// Call the provided `engine_visitor` on the next scan metadata item. The visitor will be provided with +/// a [`SharedScanMetadata`], which contains the actual scan files and the associated selection vector. It is the +/// responsibility of the _engine_ to free the associated resources after use by calling +/// [`free_engine_data`] and [`free_bool_slice`] respectively. /// /// # Safety /// -/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by -/// [`free_kernel_scan_data`]. The visitor function pointer must be non-null. +/// The iterator must be valid (returned by [scan_metadata_iter_init]) and not yet freed by +/// [`free_scan_metadata_iter`]. The visitor function pointer must be non-null. +/// +/// [`free_bool_slice`]: crate::free_bool_slice +/// [`free_engine_data`]: crate::free_engine_data #[no_mangle] -pub unsafe extern "C" fn kernel_scan_data_next( - data: Handle, +pub unsafe extern "C" fn scan_metadata_next( + data: Handle, engine_context: NullableCvoid, engine_visitor: extern "C" fn( engine_context: NullableCvoid, - engine_data: Handle, - selection_vector: KernelBoolSlice, - transforms: &CTransforms, + scan_metadata: Handle, ), ) -> ExternResult { let data = unsafe { data.as_ref() }; - kernel_scan_data_next_impl(data, engine_context, engine_visitor) + scan_metadata_next_impl(data, engine_context, engine_visitor) .into_extern_result(&data.engine.as_ref()) } -fn kernel_scan_data_next_impl( - data: &KernelScanDataIterator, +fn scan_metadata_next_impl( + data: &ScanMetadataIterator, engine_context: NullableCvoid, engine_visitor: extern "C" fn( engine_context: NullableCvoid, - engine_data: Handle, - selection_vector: KernelBoolSlice, - transforms: &CTransforms, + scan_metadata: Handle, ), ) -> DeltaResult { let mut data = data .data .lock() .map_err(|_| Error::generic("poisoned mutex"))?; - if let Some((data, sel_vec, transforms)) = data.next().transpose()? { - let bool_slice = KernelBoolSlice::from(sel_vec); - let transform_map = CTransforms { transforms }; - (engine_visitor)(engine_context, data.into(), bool_slice, &transform_map); + if let Some(scan_metadata) = data.next().transpose()? { + (engine_visitor)(engine_context, Arc::new(scan_metadata).into()); Ok(true) } else { Ok(false) @@ -245,11 +255,11 @@ fn kernel_scan_data_next_impl( /// # Safety /// /// Caller is responsible for (at most once) passing a valid pointer returned by a call to -/// [`kernel_scan_data_init`]. +/// [`scan_metadata_iter_init`]. // we should probably be consistent with drop vs. free on engine side (probably the latter is more // intuitive to non-rust code) #[no_mangle] -pub unsafe extern "C" fn free_kernel_scan_data(data: Handle) { +pub unsafe extern "C" fn free_scan_metadata_iter(data: Handle) { data.drop_handle(); } @@ -263,12 +273,23 @@ pub struct Stats { pub num_records: u64, } +/// This callback will be invoked for each valid file that needs to be read for a scan. +/// +/// The arguments to the callback are: +/// * `context`: a `void*` context this can be anything that engine needs to pass through to each call +/// * `path`: a `KernelStringSlice` which is the path to the file +/// * `size`: an `i64` which is the size of the file +/// * `dv_info`: a [`DvInfo`] struct, which allows getting the selection vector for this file +/// * `transform`: An optional expression that, if not `NULL`, _must_ be applied to physical data to +/// convert it to the correct logical format. If this is `NULL`, no transform is needed. +/// * `partition_values`: [DEPRECATED] a `HashMap` which are partition values type CScanCallback = extern "C" fn( engine_context: NullableCvoid, path: KernelStringSlice, size: i64, stats: Option<&Stats>, dv_info: &DvInfo, + transform: Option<&Expression>, partition_map: &CStringMap, ); @@ -303,10 +324,40 @@ pub unsafe extern "C" fn get_from_string_map( .and_then(|v| allocate_fn(kernel_string_slice!(v))) } +/// Transformation expressions that need to be applied to each row `i` in ScanMetadata. You can use +/// [`get_transform_for_row`] to get the transform for a particular row. If that returns an +/// associated expression, it _must_ be applied to the data read from the file specified by the +/// row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If +/// `get_transform_for_row` returns `NULL` no expression need be applied and the data read from disk +/// is already in the correct logical state. +/// +/// NB: If you are using `visit_scan_metadata` you don't need to worry about dealing with probing +/// `CTransforms`. The callback will be invoked with the correct transform for you. pub struct CTransforms { transforms: Vec>, } +#[no_mangle] +/// Allow getting the transform for a particular row. If the requested row is outside the range of +/// the passed `CTransforms` returns `NULL`, otherwise returns the element at the index of the +/// specified row. See also [`CTransforms`] above. +/// +/// # Safety +/// +/// The engine is responsible for providing a valid [`CTransforms`] pointer, and for checking if the +/// return value is `NULL` or not. +pub unsafe extern "C" fn get_transform_for_row( + row: usize, + transforms: &CTransforms, +) -> Option> { + transforms + .transforms + .get(row) + .cloned() + .flatten() + .map(Into::into) +} + /// Get a selection vector out of a [`DvInfo`] struct /// /// # Safety @@ -369,9 +420,10 @@ fn rust_callback( size: i64, kernel_stats: Option, dv_info: DvInfo, - _transform: Option, + transform: Option, partition_values: HashMap, ) { + let transform = transform.map(|e| e.as_ref().clone()); let partition_map = CStringMap { values: partition_values, }; @@ -384,6 +436,7 @@ fn rust_callback( size, stats.as_ref(), &dv_info, + transform.as_ref(), &partition_map, ); } @@ -394,32 +447,25 @@ struct ContextWrapper { callback: CScanCallback, } -/// Shim for ffi to call visit_scan_data. This will generally be called when iterating through scan -/// data which provides the data handle and selection vector as each element in the iterator. +/// Shim for ffi to call visit_scan_metadata. This will generally be called when iterating through scan +/// data which provides the [`SharedScanMetadata`] as each element in the iterator. /// /// # Safety -/// engine is responsible for passing a valid [`ExclusiveEngineData`] and selection vector. +/// engine is responsible for passing a valid [`SharedScanMetadata`]. #[no_mangle] -pub unsafe extern "C" fn visit_scan_data( - data: Handle, - selection_vec: KernelBoolSlice, - transforms: &CTransforms, +pub unsafe extern "C" fn visit_scan_metadata( + scan_metadata: Handle, engine_context: NullableCvoid, callback: CScanCallback, ) { - let selection_vec = unsafe { selection_vec.as_ref() }; - let data = unsafe { data.as_ref() }; + let scan_metadata = unsafe { scan_metadata.as_ref() }; let context_wrapper = ContextWrapper { engine_context, callback, }; + // TODO: return ExternResult to caller instead of panicking? - visit_scan_files( - data, - selection_vec, - &transforms.transforms, - context_wrapper, - rust_callback, - ) - .unwrap(); + scan_metadata + .visit_scan_files(context_wrapper, rust_callback) + .unwrap(); } diff --git a/ffi/src/schema.rs b/ffi/src/schema.rs index f033ac8d9..a474c80c3 100644 --- a/ffi/src/schema.rs +++ b/ffi/src/schema.rs @@ -1,7 +1,8 @@ use std::os::raw::c_void; -use crate::scan::{CStringMap, SharedSchema}; -use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot}; +use crate::handle::Handle; +use crate::scan::CStringMap; +use crate::{kernel_string_slice, KernelStringSlice, SharedSchema}; use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own @@ -192,23 +193,6 @@ pub struct EngineSchemaVisitor { ), } -/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the -/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. -/// -/// This method returns the id of the list allocated to hold the top level schema columns. -/// -/// # Safety -/// -/// Caller is responsible for passing a valid snapshot handle and schema visitor. -#[no_mangle] -pub unsafe extern "C" fn visit_snapshot_schema( - snapshot: Handle, - visitor: &mut EngineSchemaVisitor, -) -> usize { - let snapshot = unsafe { snapshot.as_ref() }; - visit_schema_impl(snapshot.schema(), visitor) -} - /// Visit the given `schema` using the provided `visitor`. See the documentation of /// [`EngineSchemaVisitor`] for a description of how this visitor works. /// diff --git a/ffi/tests/invalid-handle-code/private-constructor.stderr b/ffi/tests/invalid-handle-code/private-constructor.stderr index b6d9c5e07..14f35a5bf 100644 --- a/ffi/tests/invalid-handle-code/private-constructor.stderr +++ b/ffi/tests/invalid-handle-code/private-constructor.stderr @@ -2,4 +2,4 @@ error[E0451]: field `ptr` of struct `Handle` is private --> tests/invalid-handle-code/private-constructor.rs:10:41 | 10 | let _: Handle = Handle { ptr: std::ptr::NonNull::dangling() }; - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ private field + | ^^^ private field diff --git a/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected b/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected index 4a062b104..324ef0086 100644 --- a/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected +++ b/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected @@ -6,6 +6,14 @@ Schema: โ”œโ”€ number: long โ””โ”€ a_float: double +letter: [ + "a", + "e", + "f", + "a", + "b", + "c" +] number: [ 4, 5, @@ -22,11 +30,3 @@ a_float: [ 2.2, 3.3 ] -letter: [ - "a", - "e", - "f", - "a", - "b", - "c" -] diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index cc0a5abd1..02e924260 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -6,19 +6,4 @@ edition = "2021" [workspace] [dependencies] -arrow = "=53.0.0" -delta_kernel = { path = "../kernel", features = ["arrow-conversion", "arrow-expression", "default-engine", "sync-engine"] } - -[patch.'file:///../kernel'] -arrow = "=53.0.0" -arrow-arith = "=53.0.0" -arrow-array = "=53.0.0" -arrow-buffer = "=53.0.0" -arrow-cast = "=53.0.0" -arrow-data = "=53.0.0" -arrow-ord = "=53.0.0" -arrow-json = "=53.0.0" -arrow-select = "=53.0.0" -arrow-schema = "=53.0.0" -parquet = "=53.0.0" -object_store = "=0.11.1" +delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] } diff --git a/integration-tests/src/main.rs b/integration-tests/src/main.rs index 55a809e8c..db26d0e4d 100644 --- a/integration-tests/src/main.rs +++ b/integration-tests/src/main.rs @@ -1,15 +1,16 @@ -fn create_arrow_schema() -> arrow::datatypes::Schema { - use arrow::datatypes::{DataType, Field, Schema}; +use delta_kernel::arrow::datatypes::{DataType, Field, Schema}; + +fn create_arrow_schema() -> Schema { let field_a = Field::new("a", DataType::Int64, false); let field_b = Field::new("b", DataType::Boolean, false); Schema::new(vec![field_a, field_b]) } fn create_kernel_schema() -> delta_kernel::schema::Schema { - use delta_kernel::schema::{DataType, Schema, StructField}; + use delta_kernel::schema::{DataType, StructField}; let field_a = StructField::not_null("a", DataType::LONG); let field_b = StructField::not_null("b", DataType::BOOLEAN); - Schema::new(vec![field_a, field_b]) + delta_kernel::schema::Schema::new(vec![field_a, field_b]) } fn main() { diff --git a/integration-tests/test-all-arrow-versions.sh b/integration-tests/test-all-arrow-versions.sh index 35c8fdc7d..e4207a56e 100755 --- a/integration-tests/test-all-arrow-versions.sh +++ b/integration-tests/test-all-arrow-versions.sh @@ -2,38 +2,43 @@ set -eu -o pipefail -is_version_le() { - [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] +clean_up () { + CODE=$? + git checkout HEAD Cargo.toml + exit $CODE } -is_version_lt() { - if [ "$1" = "$2" ] - then - return 1 - else - is_version_le "$1" "$2" - fi -} +# ensure we checkout the clean version of Cargo.toml no matter how we exit +trap clean_up EXIT test_arrow_version() { ARROW_VERSION="$1" echo "== Testing version $ARROW_VERSION ==" - sed -i'' -e "s/\(arrow[^\"]*=[^\"]*\).*/\1\"=$ARROW_VERSION\"/" Cargo.toml - sed -i'' -e "s/\(parquet[^\"]*\).*/\1\"=$ARROW_VERSION\"/" Cargo.toml cargo clean rm -f Cargo.lock cargo update + echo "Cargo.toml is:" cat Cargo.toml - cargo run + echo "" + if [ "$ARROW_VERSION" = "ALL_ENABLED" ]; then + echo "testing with --all-features" + cargo run --all-features + else + echo "testing with --features ${ARROW_VERSION}" + cargo run --features ${ARROW_VERSION} + fi } -MIN_ARROW_VER="53.0.0" -MAX_ARROW_VER="54.0.0" +FEATURES=$(cat ../kernel/Cargo.toml | grep -e ^arrow_ | awk '{ print $1 }' | sort -u) -for ARROW_VERSION in $(curl -s https://crates.io/api/v1/crates/arrow | jq -r '.versions[].num' | tr -d '\r') + +echo "[features]" >> Cargo.toml + +for ARROW_VERSION in ${FEATURES} do - if ! is_version_lt "$ARROW_VERSION" "$MIN_ARROW_VER" && is_version_lt "$ARROW_VERSION" "$MAX_ARROW_VER" - then - test_arrow_version "$ARROW_VERSION" - fi + echo "${ARROW_VERSION} = [\"delta_kernel/${ARROW_VERSION}\"]" >> Cargo.toml + test_arrow_version $ARROW_VERSION done + +test_arrow_version "ALL_ENABLED" + diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 1431b1ff1..323aee250 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -30,13 +30,15 @@ pre-release-hook = [ "--unreleased", "--prepend", "../CHANGELOG.md", + "--include-path", + "*", "--tag", "{{version}}", ] [dependencies] bytes = "1.7" -chrono = { version = "0.4" } +chrono = "=0.4.39" fix-hidden-lifetime-bug = "0.2" indexmap = "2.5.0" itertools = "0.13" @@ -47,31 +49,33 @@ thiserror = "1" # only for structured logging tracing = { version = "0.1", features = ["log"] } url = "2" -uuid = "1.10.0" +uuid = { version = "1.10.0", features = ["v4", "fast-rng"] } z85 = "3.0.5" # bring in our derive macros -delta_kernel_derive = { path = "../derive-macros", version = "0.6.1" } +delta_kernel_derive = { path = "../derive-macros", version = "0.9.0" } # used for developer-visibility visibility = "0.1.1" # Used in the sync engine tempfile = { version = "3", optional = true } + +# Arrow supported versions +## 53 # Used in default engine -arrow-buffer = { workspace = true, optional = true } -arrow-array = { workspace = true, optional = true, features = ["chrono-tz"] } -arrow-select = { workspace = true, optional = true } -arrow-arith = { workspace = true, optional = true } -arrow-cast = { workspace = true, optional = true } -arrow-json = { workspace = true, optional = true } -arrow-ord = { workspace = true, optional = true } -arrow-schema = { workspace = true, optional = true } +arrow_53 = { package = "arrow", version = "53", features = ["chrono-tz", "ffi", "json", "prettyprint"], optional = true } +# Used in default and sync engine +parquet_53 = { package = "parquet", version = "53", features = ["async", "object_store"] , optional = true } +###### +## 54 +arrow_54 = { package = "arrow", version = "54", features = ["chrono-tz", "ffi", "json", "prettyprint"], optional = true } +parquet_54 = { package = "parquet", version = "54", features = ["async", "object_store"] , optional = true } +###### + futures = { version = "0.3", optional = true } object_store = { workspace = true, optional = true } hdfs-native-object-store = { workspace = true, optional = true } -# Used in default and sync engine -parquet = { workspace = true, optional = true } # Used for fetching direct urls (like pre-signed urls) reqwest = { version = "0.12.8", default-features = false, optional = true } strum = { version = "0.26", features = ["derive"] } @@ -85,14 +89,17 @@ hdfs-native = { workspace = true, optional = true } walkdir = { workspace = true, optional = true } [features] -arrow-conversion = ["arrow-schema"] -arrow-expression = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", -] +# The default version to be expected +arrow = ["arrow_53"] + +arrow_53 = ["dep:arrow_53", "dep:parquet_53"] + +arrow_54 = ["dep:arrow_54", "dep:parquet_54"] + +need_arrow = [] +arrow-conversion = ["need_arrow"] +arrow-expression = ["need_arrow"] + cloud = [ "object_store/aws", "object_store/azure", @@ -107,19 +114,10 @@ default = [] default-engine-base = [ "arrow-conversion", "arrow-expression", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-json", - "arrow-schema", - "arrow-select", "futures", + "need_arrow", "object_store", - "parquet/async", - "parquet/object_store", "tokio", - "uuid/v4", - "uuid/fast-rng", ] # the default-engine use the reqwest crate with default features which uses native-tls. if you want @@ -134,13 +132,7 @@ default-engine-rustls = [ developer-visibility = [] sync-engine = [ - "arrow-cast", - "arrow-conversion", - "arrow-expression", - "arrow-array", - "arrow-json", - "arrow-select", - "parquet", + "need_arrow", "tempfile", ] integration-test = [ @@ -156,9 +148,9 @@ version = "=0.5.9" rustc_version = "0.4.1" [dev-dependencies] -arrow = { workspace = true, features = ["json", "prettyprint"] } -delta_kernel = { path = ".", features = ["default-engine", "sync-engine"] } +delta_kernel = { path = ".", features = ["arrow", "default-engine", "sync-engine"] } test_utils = { path = "../test-utils" } +async-trait = "0.1" # only used for our custom SlowGetStore ObjectStore implementation paste = "1.0" test-log = { version = "0.2", default-features = false, features = ["trace"] } tempfile = "3" diff --git a/kernel/examples/inspect-table/Cargo.toml b/kernel/examples/inspect-table/Cargo.toml index b81a8ac5b..4208c6938 100644 --- a/kernel/examples/inspect-table/Cargo.toml +++ b/kernel/examples/inspect-table/Cargo.toml @@ -5,11 +5,11 @@ edition = "2021" publish = false [dependencies] -arrow-array = { workspace = true } -arrow-schema = { workspace = true } +arrow = "53" clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ "cloud", + "arrow_53", "default-engine", "developer-visibility", ] } diff --git a/kernel/examples/inspect-table/src/main.rs b/kernel/examples/inspect-table/src/main.rs index f5145905e..fab6a412b 100644 --- a/kernel/examples/inspect-table/src/main.rs +++ b/kernel/examples/inspect-table/src/main.rs @@ -41,7 +41,7 @@ enum Commands { /// Show the table's schema Schema, /// Show the meta-data that would be used to scan the table - ScanData, + ScanMetadata, /// Show each action from the log-segments Actions { /// Show the log in reverse order (default is log replay order -- newest first) @@ -207,23 +207,17 @@ fn try_main() -> DeltaResult<()> { Commands::Schema => { println!("{:#?}", snapshot.schema()); } - Commands::ScanData => { + Commands::ScanMetadata => { let scan = ScanBuilder::new(snapshot).build()?; - let scan_data = scan.scan_data(&engine)?; - for res in scan_data { - let (data, vector, transforms) = res?; - delta_kernel::scan::state::visit_scan_files( - data.as_ref(), - &vector, - &transforms, - (), - print_scan_file, - )?; + let scan_metadata_iter = scan.scan_metadata(&engine)?; + for res in scan_metadata_iter { + let scan_metadata = res?; + scan_metadata.visit_scan_files((), print_scan_file)?; } } Commands::Actions { oldest_first } => { let log_schema = get_log_schema(); - let actions = snapshot.log_segment().replay( + let actions = snapshot.log_segment().read_actions( &engine, log_schema.clone(), log_schema.clone(), diff --git a/kernel/examples/read-table-changes/Cargo.toml b/kernel/examples/read-table-changes/Cargo.toml index 181da7dc6..35f077bc2 100644 --- a/kernel/examples/read-table-changes/Cargo.toml +++ b/kernel/examples/read-table-changes/Cargo.toml @@ -8,14 +8,12 @@ publish = false release = false [dependencies] -arrow-array = { workspace = true } -arrow-schema = { workspace = true } clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ "cloud", + "arrow", "default-engine", ] } env_logger = "0.11.3" url = "2" itertools = "0.13" -arrow = { workspace = true, features = ["prettyprint"] } diff --git a/kernel/examples/read-table-changes/src/main.rs b/kernel/examples/read-table-changes/src/main.rs index 3360a06cf..ddafc1554 100644 --- a/kernel/examples/read-table-changes/src/main.rs +++ b/kernel/examples/read-table-changes/src/main.rs @@ -1,8 +1,8 @@ use std::{collections::HashMap, sync::Arc}; -use arrow::{compute::filter_record_batch, util::pretty::print_batches}; -use arrow_array::RecordBatch; use clap::Parser; +use delta_kernel::arrow::array::RecordBatch; +use delta_kernel::arrow::{compute::filter_record_batch, util::pretty::print_batches}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; use delta_kernel::engine::default::DefaultEngine; diff --git a/kernel/examples/read-table-multi-threaded/Cargo.toml b/kernel/examples/read-table-multi-threaded/Cargo.toml index 3362e579a..8cb7c9cd3 100644 --- a/kernel/examples/read-table-multi-threaded/Cargo.toml +++ b/kernel/examples/read-table-multi-threaded/Cargo.toml @@ -5,10 +5,11 @@ edition = "2021" publish = false [dependencies] -arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] } +arrow = { version = "53", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ "cloud", + "arrow_53", "default-engine", "sync-engine", "developer-visibility", diff --git a/kernel/examples/read-table-multi-threaded/README.md b/kernel/examples/read-table-multi-threaded/README.md index 5c4cdebfb..8cb45ecdb 100644 --- a/kernel/examples/read-table-multi-threaded/README.md +++ b/kernel/examples/read-table-multi-threaded/README.md @@ -3,7 +3,7 @@ Read Table Multi-Threaded # About This example shows a program that reads a table using multiple threads. This shows the use of the -`scan_data`, `global_scan_state`, and `visit_scan_files` methods, that can be used to partition work +`scan_metadata`, `global_scan_state`, and `visit_scan_files` methods, that can be used to partition work to either multiple threads, or workers (in the case of a distributed engine). You can run this from the same directory as this `README.md` by running `cargo run -- [args]`. @@ -49,4 +49,4 @@ To select specific columns you need a `--` after the column list specification. - Read `letter` and `data` columns from the `multi_partitioned` dat table: -`cargo run -- --columns letter,data -- ../../../acceptance/tests/dat/out/reader_tests/generated/multi_partitioned/delta/` +`cargo run -- --columns letter,data -- ../../../acceptance/tests/dat/out/reader_tests/generated/multi_partitioned/delta/` \ No newline at end of file diff --git a/kernel/examples/read-table-multi-threaded/src/main.rs b/kernel/examples/read-table-multi-threaded/src/main.rs index 9e2cee88c..e75eeeb4e 100644 --- a/kernel/examples/read-table-multi-threaded/src/main.rs +++ b/kernel/examples/read-table-multi-threaded/src/main.rs @@ -20,7 +20,7 @@ use clap::{Parser, ValueEnum}; use url::Url; /// An example program that reads a table using multiple threads. This shows the use of the -/// scan_data and global_scan_state methods on a Scan, that can be used to partition work to either +/// scan_metadata and global_scan_state methods on a Scan, that can be used to partition work to either /// multiple threads, or workers (in the case of a distributed engine). #[derive(Parser)] #[command(author, version, about, long_about = None)] @@ -179,7 +179,7 @@ fn try_main() -> DeltaResult<()> { // [`delta_kernel::scan::scan_row_schema`]. Generally engines will not need to interact with // this data directly, and can just call [`visit_scan_files`] to get pre-parsed data back from // the kernel. - let scan_data = scan.scan_data(engine.as_ref())?; + let scan_metadata = scan.scan_metadata(engine.as_ref())?; // get any global state associated with this scan let global_state = Arc::new(scan.global_scan_state()); @@ -209,15 +209,9 @@ fn try_main() -> DeltaResult<()> { // done sending drop(record_batch_tx); - for res in scan_data { - let (data, vector, transforms) = res?; - scan_file_tx = delta_kernel::scan::state::visit_scan_files( - data.as_ref(), - &vector, - &transforms, - scan_file_tx, - send_scan_file, - )?; + for res in scan_metadata { + let scan_metadata = res?; + scan_file_tx = scan_metadata.visit_scan_files(scan_file_tx, send_scan_file)?; } // have sent all scan files, drop this so threads will exit when there's no more work @@ -286,7 +280,7 @@ fn do_work( // enough meta-data was passed to each thread to correctly apply the selection // vector let read_results = engine - .get_parquet_handler() + .parquet_handler() .read_parquet_files(&[meta], scan_state.physical_schema.clone(), None) .unwrap(); diff --git a/kernel/examples/read-table-single-threaded/Cargo.toml b/kernel/examples/read-table-single-threaded/Cargo.toml index dc0458139..e71959e7b 100644 --- a/kernel/examples/read-table-single-threaded/Cargo.toml +++ b/kernel/examples/read-table-single-threaded/Cargo.toml @@ -5,9 +5,10 @@ edition = "2021" publish = false [dependencies] -arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] } +arrow = { version = "53", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ + "arrow_53", "cloud", "default-engine", "sync-engine", diff --git a/kernel/src/actions/deletion_vector.rs b/kernel/src/actions/deletion_vector.rs index 953b73d24..8b0c20d59 100644 --- a/kernel/src/actions/deletion_vector.rs +++ b/kernel/src/actions/deletion_vector.rs @@ -10,7 +10,7 @@ use url::Url; use delta_kernel_derive::Schema; use crate::utils::require; -use crate::{DeltaResult, Error, FileSystemClient}; +use crate::{DeltaResult, Error, StorageHandler}; #[derive(Debug, Clone, PartialEq, Eq, Schema)] #[cfg_attr(test, derive(serde::Serialize), serde(rename_all = "camelCase"))] @@ -66,7 +66,7 @@ impl DeletionVectorDescriptor { let path_len = self.path_or_inline_dv.len(); require!( path_len >= 20, - Error::deletion_vector("Invalid length {path_len}, must be >= 20") + Error::DeletionVector(format!("Invalid length {path_len}, must be >= 20")) ); let prefix_len = path_len - 20; let decoded = z85::decode(&self.path_or_inline_dv[prefix_len..]) @@ -104,7 +104,7 @@ impl DeletionVectorDescriptor { // are present, we assert they are the same pub fn read( &self, - fs_client: Arc, + storage: Arc, parent: &Url, ) -> DeltaResult { match self.absolute_path(parent)? { @@ -125,7 +125,7 @@ impl DeletionVectorDescriptor { let offset = self.offset; let size_in_bytes = self.size_in_bytes; - let dv_data = fs_client + let dv_data = storage .read_files(vec![(path, None)])? .next() .ok_or(Error::missing_data("No deletion vector data"))??; @@ -178,10 +178,10 @@ impl DeletionVectorDescriptor { /// represents a row index that is deleted from the table. pub fn row_indexes( &self, - fs_client: Arc, + storage: Arc, parent: &Url, ) -> DeltaResult> { - Ok(self.read(fs_client, parent)?.into_iter().collect()) + Ok(self.read(storage, parent)?.into_iter().collect()) } } @@ -363,9 +363,9 @@ mod tests { fn test_inline_read() { let inline = dv_inline(); let sync_engine = SyncEngine::new(); - let fs_client = sync_engine.get_file_system_client(); + let storage = sync_engine.storage_handler(); let parent = Url::parse("http://not.used").unwrap(); - let tree_map = inline.read(fs_client, &parent).unwrap(); + let tree_map = inline.read(storage, &parent).unwrap(); assert_eq!(tree_map.len(), 6); for i in [3, 4, 7, 11, 18, 29] { assert!(tree_map.contains(i)); @@ -381,10 +381,10 @@ mod tests { std::fs::canonicalize(PathBuf::from("./tests/data/table-with-dv-small/")).unwrap(); let parent = url::Url::from_directory_path(path).unwrap(); let sync_engine = SyncEngine::new(); - let fs_client = sync_engine.get_file_system_client(); + let storage = sync_engine.storage_handler(); let example = dv_example(); - let tree_map = example.read(fs_client, &parent).unwrap(); + let tree_map = example.read(storage, &parent).unwrap(); let expected: Vec = vec![0, 9]; let found = tree_map.iter().collect::>(); @@ -441,9 +441,9 @@ mod tests { fn test_dv_row_indexes() { let example = dv_inline(); let sync_engine = SyncEngine::new(); - let fs_client = sync_engine.get_file_system_client(); + let storage = sync_engine.storage_handler(); let parent = Url::parse("http://not.used").unwrap(); - let row_idx = example.row_indexes(fs_client, &parent).unwrap(); + let row_idx = example.row_indexes(storage, &parent).unwrap(); assert_eq!(row_idx.len(), 6); assert_eq!(&row_idx, &[3, 4, 7, 11, 18, 29]); diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 8bcb5df50..42e009ac5 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -1,8 +1,7 @@ //! Provides parsing and manipulation of the various actions defined in the [Delta //! specification](https://github.com/delta-io/delta/blob/master/PROTOCOL.md) -use std::any::type_name; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::fmt::{Debug, Display}; use std::hash::Hash; use std::str::FromStr; @@ -12,14 +11,16 @@ use self::deletion_vector::DeletionVectorDescriptor; use crate::actions::schemas::GetStructField; use crate::schema::{SchemaRef, StructType}; use crate::table_features::{ - ReaderFeatures, WriterFeatures, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES, + ReaderFeature, WriterFeature, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES, }; use crate::table_properties::TableProperties; use crate::utils::require; -use crate::{DeltaResult, EngineData, Error, RowVisitor as _}; +use crate::{DeltaResult, EngineData, Error, FileMeta, RowVisitor as _}; +use url::Url; use visitors::{MetadataVisitor, ProtocolVisitor}; use delta_kernel_derive::Schema; +use itertools::Itertools; use serde::{Deserialize, Serialize}; pub mod deletion_vector; @@ -47,6 +48,8 @@ pub(crate) const COMMIT_INFO_NAME: &str = "commitInfo"; pub(crate) const CDC_NAME: &str = "cdc"; #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) const SIDECAR_NAME: &str = "sidecar"; +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] +pub(crate) const CHECKPOINT_METADATA_NAME: &str = "checkpointMetadata"; static LOG_ADD_SCHEMA: LazyLock = LazyLock::new(|| StructType::new([Option::::get_struct_field(ADD_NAME)]).into()); @@ -61,6 +64,7 @@ static LOG_SCHEMA: LazyLock = LazyLock::new(|| { Option::::get_struct_field(COMMIT_INFO_NAME), Option::::get_struct_field(CDC_NAME), Option::::get_struct_field(SIDECAR_NAME), + Option::::get_struct_field(CHECKPOINT_METADATA_NAME), // We don't support the following actions yet //Option::::get_struct_field(DOMAIN_METADATA_NAME), ]) @@ -88,12 +92,13 @@ pub(crate) fn get_log_commit_info_schema() -> &'static SchemaRef { } #[derive(Debug, Clone, PartialEq, Eq, Schema)] +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[cfg_attr(test, derive(Serialize), serde(rename_all = "camelCase"))] -pub struct Format { +pub(crate) struct Format { /// Name of the encoding for files in this table - pub provider: String, + pub(crate) provider: String, /// A map containing configuration options for the format - pub options: HashMap, + pub(crate) options: HashMap, } impl Default for Format { @@ -107,49 +112,63 @@ impl Default for Format { #[derive(Debug, Default, Clone, PartialEq, Eq, Schema)] #[cfg_attr(test, derive(Serialize), serde(rename_all = "camelCase"))] -pub struct Metadata { +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] +pub(crate) struct Metadata { /// Unique identifier for this table - pub id: String, + pub(crate) id: String, /// User-provided identifier for this table - pub name: Option, + pub(crate) name: Option, /// User-provided description for this table - pub description: Option, + pub(crate) description: Option, /// Specification of the encoding for the files stored in the table - pub format: Format, + pub(crate) format: Format, /// Schema of the table - pub schema_string: String, + pub(crate) schema_string: String, /// Column names by which the data should be partitioned - pub partition_columns: Vec, + pub(crate) partition_columns: Vec, /// The time when this metadata action is created, in milliseconds since the Unix epoch - pub created_time: Option, + pub(crate) created_time: Option, /// Configuration options for the metadata action. These are parsed into [`TableProperties`]. - pub configuration: HashMap, + pub(crate) configuration: HashMap, } impl Metadata { - pub fn try_new_from_data(data: &dyn EngineData) -> DeltaResult> { + pub(crate) fn try_new_from_data(data: &dyn EngineData) -> DeltaResult> { let mut visitor = MetadataVisitor::default(); visitor.visit_rows_of(data)?; Ok(visitor.metadata) } - pub fn parse_schema(&self) -> DeltaResult { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(dead_code)] + pub(crate) fn configuration(&self) -> &HashMap { + &self.configuration + } + + pub(crate) fn parse_schema(&self) -> DeltaResult { Ok(serde_json::from_str(&self.schema_string)?) } + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(dead_code)] + pub(crate) fn partition_columns(&self) -> &Vec { + &self.partition_columns + } + /// Parse the metadata configuration HashMap into a TableProperties struct. /// Note that parsing is infallible -- any items that fail to parse are simply propagated /// through to the `TableProperties.unknown_properties` field. - pub fn parse_table_properties(&self) -> TableProperties { + pub(crate) fn parse_table_properties(&self) -> TableProperties { TableProperties::from(self.configuration.iter()) } } #[derive(Default, Debug, Clone, PartialEq, Eq, Schema, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] // TODO move to another module so that we disallow constructing this struct without using the // try_new function. -pub struct Protocol { +pub(crate) struct Protocol { /// The minimum version of the Delta read protocol that a client must implement /// in order to correctly read this table min_reader_version: i32, @@ -159,21 +178,36 @@ pub struct Protocol { /// A collection of features that a client must implement in order to correctly /// read this table (exist only when minReaderVersion is set to 3) #[serde(skip_serializing_if = "Option::is_none")] - reader_features: Option>, + reader_features: Option>, /// A collection of features that a client must implement in order to correctly /// write this table (exist only when minWriterVersion is set to 7) #[serde(skip_serializing_if = "Option::is_none")] - writer_features: Option>, + writer_features: Option>, +} + +fn parse_features(features: Option>) -> Option> +where + T: FromStr, + T::Err: Debug, +{ + features + .map(|fs| { + fs.into_iter() + .map(|f| T::from_str(&f.to_string())) + .collect() + }) + .transpose() + .expect("Parsing FromStr should never fail with strum 'default'") } impl Protocol { /// Try to create a new Protocol instance from reader/writer versions and table features. This /// can fail if the protocol is invalid. - pub fn try_new( + pub(crate) fn try_new( min_reader_version: i32, min_writer_version: i32, - reader_features: Option>>, - writer_features: Option>>, + reader_features: Option>, + writer_features: Option>, ) -> DeltaResult { if min_reader_version == 3 { require!( @@ -191,8 +225,10 @@ impl Protocol { ) ); } - let reader_features = reader_features.map(|f| f.into_iter().map(Into::into).collect()); - let writer_features = writer_features.map(|f| f.into_iter().map(Into::into).collect()); + + let reader_features = parse_features(reader_features); + let writer_features = parse_features(writer_features); + Ok(Protocol { min_reader_version, min_writer_version, @@ -203,48 +239,50 @@ impl Protocol { /// Create a new Protocol by visiting the EngineData and extracting the first protocol row into /// a Protocol instance. If no protocol row is found, returns Ok(None). - pub fn try_new_from_data(data: &dyn EngineData) -> DeltaResult> { + pub(crate) fn try_new_from_data(data: &dyn EngineData) -> DeltaResult> { let mut visitor = ProtocolVisitor::default(); visitor.visit_rows_of(data)?; Ok(visitor.protocol) } /// This protocol's minimum reader version - pub fn min_reader_version(&self) -> i32 { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + pub(crate) fn min_reader_version(&self) -> i32 { self.min_reader_version } /// This protocol's minimum writer version - pub fn min_writer_version(&self) -> i32 { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + pub(crate) fn min_writer_version(&self) -> i32 { self.min_writer_version } /// Get the reader features for the protocol - pub fn reader_features(&self) -> Option<&[String]> { + pub(crate) fn reader_features(&self) -> Option<&[ReaderFeature]> { self.reader_features.as_deref() } /// Get the writer features for the protocol - pub fn writer_features(&self) -> Option<&[String]> { + pub(crate) fn writer_features(&self) -> Option<&[WriterFeature]> { self.writer_features.as_deref() } /// True if this protocol has the requested reader feature - pub fn has_reader_feature(&self, feature: &ReaderFeatures) -> bool { + pub(crate) fn has_reader_feature(&self, feature: &ReaderFeature) -> bool { self.reader_features() - .is_some_and(|features| features.iter().any(|f| f == feature.as_ref())) + .is_some_and(|features| features.contains(feature)) } /// True if this protocol has the requested writer feature - pub fn has_writer_feature(&self, feature: &WriterFeatures) -> bool { + pub(crate) fn has_writer_feature(&self, feature: &WriterFeature) -> bool { self.writer_features() - .is_some_and(|features| features.iter().any(|f| f == feature.as_ref())) + .is_some_and(|features| features.contains(feature)) } /// Check if reading a table with this protocol is supported. That is: does the kernel support /// the specified protocol reader version and all enabled reader features? If yes, returns unit /// type, otherwise will return an error. - pub fn ensure_read_supported(&self) -> DeltaResult<()> { + pub(crate) fn ensure_read_supported(&self) -> DeltaResult<()> { match &self.reader_features { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if self.min_reader_version == 3 => { @@ -274,57 +312,67 @@ impl Protocol { /// Check if writing to a table with this protocol is supported. That is: does the kernel /// support the specified protocol writer version and all enabled writer features? - pub fn ensure_write_supported(&self) -> DeltaResult<()> { + pub(crate) fn ensure_write_supported(&self) -> DeltaResult<()> { match &self.writer_features { - // if min_reader_version = 3 and min_writer_version = 7 and all writer features are - // supported => OK - Some(writer_features) - if self.min_reader_version == 3 && self.min_writer_version == 7 => - { + Some(writer_features) if self.min_writer_version == 7 => { + // if we're on version 7, make sure we support all the specified features ensure_supported_features(writer_features, &SUPPORTED_WRITER_FEATURES) } - // otherwise not supported - _ => Err(Error::unsupported( - "Only tables with min reader version 3 and min writer version 7 with no table features are supported." - )), + Some(_) => { + // there are features, but we're not on 7, so the protocol is actually broken + Err(Error::unsupported( + "Tables with min writer version != 7 should not have table features.", + )) + } + None => { + // no features, we currently only support version 1 or 2 in this case + require!( + self.min_writer_version == 1 || self.min_writer_version == 2, + Error::unsupported( + "Currently delta-kernel-rs can only write to tables with protocol.minWriterVersion = 1, 2, or 7" + ) + ); + Ok(()) + } } } } -// given unparsed `table_features`, parse and check if they are subset of `supported_features` +// given `table_features`, check if they are subset of `supported_features` pub(crate) fn ensure_supported_features( - table_features: &[String], - supported_features: &HashSet, + table_features: &[T], + supported_features: &[T], ) -> DeltaResult<()> where + T: Display + FromStr + Hash + Eq, ::Err: Display, - T: Debug + FromStr + Hash + Eq, { - let error = |unsupported, unsupported_or_unknown| { - let supported = supported_features.iter().collect::>(); - let features_type = type_name::() - .rsplit("::") - .next() - .unwrap_or("table features"); - Error::Unsupported(format!( - "{} {} {:?}. Supported {} are {:?}", - unsupported_or_unknown, features_type, unsupported, features_type, supported - )) - }; - let parsed_features: HashSet = table_features + // first check if all features are supported, else we proceed to craft an error message + if table_features .iter() - .map(|s| T::from_str(s).map_err(|_| error(vec![s.to_string()], "Unknown"))) - .collect::>()?; - parsed_features - .is_subset(supported_features) - .then_some(()) - .ok_or_else(|| { - let unsupported = parsed_features - .difference(supported_features) - .map(|f| format!("{:?}", f)) - .collect::>(); - error(unsupported, "Unsupported") - }) + .all(|feature| supported_features.contains(feature)) + { + return Ok(()); + } + + // we get the type name (ReaderFeature/WriterFeature) for better error messages + let features_type = std::any::type_name::() + .rsplit("::") + .next() + .unwrap_or("table feature"); + + // NB: we didn't do this above to avoid allocation in the common case + let mut unsupported = table_features + .iter() + .filter(|feature| !supported_features.contains(*feature)); + + Err(Error::Unsupported(format!( + "Unknown {}s: \"{}\". Supported {}s: \"{}\"", + features_type, + unsupported.join("\", \""), + features_type, + supported_features.iter().join("\", \""), + ))) } #[derive(Debug, Clone, PartialEq, Eq, Schema)] @@ -357,30 +405,31 @@ pub(crate) struct CommitInfo { #[derive(Debug, Clone, PartialEq, Eq, Schema)] #[cfg_attr(test, derive(Serialize, Default), serde(rename_all = "camelCase"))] -pub struct Add { +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] +pub(crate) struct Add { /// A relative path to a data file from the root of the table or an absolute path to a file /// that should be added to the table. The path is a URI as specified by /// [RFC 2396 URI Generic Syntax], which needs to be decoded to get the data file path. /// /// [RFC 2396 URI Generic Syntax]: https://www.ietf.org/rfc/rfc2396.txt - pub path: String, + pub(crate) path: String, /// A map from partition column to value for this logical file. This map can contain null in the /// values meaning a partition is null. We drop those values from this map, due to the /// `drop_null_container_values` annotation. This means an engine can assume that if a partition /// is found in [`Metadata`] `partition_columns`, but not in this map, its value is null. #[drop_null_container_values] - pub partition_values: HashMap, + pub(crate) partition_values: HashMap, /// The size of this data file in bytes - pub size: i64, + pub(crate) size: i64, /// The time this logical file was created, as milliseconds since the epoch. - pub modification_time: i64, + pub(crate) modification_time: i64, /// When `false` the logical file must already be present in the table or the records /// in the added file must be contained in one or more remove actions in the same version. - pub data_change: bool, + pub(crate) data_change: bool, /// Contains [statistics] (e.g., count, min/max values for columns) about the data in this logical file encoded as a JSON string. /// @@ -412,7 +461,9 @@ pub struct Add { } impl Add { - pub fn dv_unique_id(&self) -> Option { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(dead_code)] + pub(crate) fn dv_unique_id(&self) -> Option { self.deletion_vector.as_ref().map(|dv| dv.unique_id()) } } @@ -500,27 +551,27 @@ pub(crate) struct Cdc { } #[derive(Debug, Clone, PartialEq, Eq, Schema)] -pub struct SetTransaction { +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] +pub(crate) struct SetTransaction { /// A unique identifier for the application performing the transaction. - pub app_id: String, + pub(crate) app_id: String, /// An application-specific numeric identifier for this transaction. - pub version: i64, + pub(crate) version: i64, /// The time when this transaction action was created in milliseconds since the Unix epoch. - pub last_updated: Option, + pub(crate) last_updated: Option, } /// The sidecar action references a sidecar file which provides some of the checkpoint's /// file actions. This action is only allowed in checkpoints following the V2 spec. /// /// [More info]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#sidecar-file-information -#[allow(unused)] //TODO: Remove once we implement V2 checkpoint file processing #[derive(Schema, Debug, PartialEq)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct Sidecar { /// A path to a sidecar file that can be either: - /// - A relative path (just the file name) within the `_delta_log/_sidecars` directory. + /// - A relative path (just the file name) within the `_delta_log/_sidecars` directory. /// - An absolute path /// The path is a URI as specified by [RFC 2396 URI Generic Syntax], which needs to be decoded /// to get the file path. @@ -538,6 +589,43 @@ pub(crate) struct Sidecar { pub tags: Option>, } +impl Sidecar { + /// Convert a Sidecar record to a FileMeta. + /// + /// This helper first builds the URL by joining the provided log_root with + /// the "_sidecars/" folder and the given sidecar path. + pub(crate) fn to_filemeta(&self, log_root: &Url) -> DeltaResult { + Ok(FileMeta { + location: log_root.join("_sidecars/")?.join(&self.path)?, + last_modified: self.modification_time, + size: self.size_in_bytes.try_into().map_err(|_| { + Error::generic(format!( + "Failed to convert sidecar size {} to usize", + self.size_in_bytes + )) + })?, + }) + } +} + +/// The CheckpointMetadata action describes details about a checkpoint following the V2 specification. +/// +/// [More info]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#checkpoint-metadata +#[derive(Debug, Clone, PartialEq, Eq, Schema)] +#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] +pub(crate) struct CheckpointMetadata { + /// The version of the V2 spec checkpoint. + /// + /// Currently using `i64` for compatibility with other actions' representations. + /// Future work will address converting numeric fields to unsigned types (e.g., `u64`) where + /// semantically appropriate (e.g., for version, size, timestamps, etc.). + /// See issue #786 for tracking progress. + pub(crate) version: i64, + + /// Map containing any additional metadata about the V2 spec checkpoint. + pub(crate) tags: Option>, +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -698,6 +786,21 @@ mod tests { assert_eq!(schema, expected); } + #[test] + fn test_checkpoint_metadata_schema() { + let schema = get_log_schema() + .project(&[CHECKPOINT_METADATA_NAME]) + .expect("Couldn't get checkpointMetadata field"); + let expected = Arc::new(StructType::new([StructField::nullable( + "checkpointMetadata", + StructType::new([ + StructField::not_null("version", DataType::LONG), + tags_field(), + ]), + )])); + assert_eq!(schema, expected); + } + #[test] fn test_transaction_schema() { let schema = get_log_schema() @@ -783,21 +886,21 @@ mod tests { } #[test] - fn test_v2_checkpoint_unsupported() { + fn test_v2_checkpoint_supported() { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::V2Checkpoint]), - Some([ReaderFeatures::V2Checkpoint]), + Some([ReaderFeature::V2Checkpoint]), + Some([ReaderFeature::V2Checkpoint]), ) .unwrap(); - assert!(protocol.ensure_read_supported().is_err()); + assert!(protocol.ensure_read_supported().is_ok()); let protocol = Protocol::try_new( 4, 7, - Some([ReaderFeatures::V2Checkpoint]), - Some([ReaderFeatures::V2Checkpoint]), + Some([ReaderFeature::V2Checkpoint]), + Some([ReaderFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_err()); @@ -817,17 +920,17 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::V2Checkpoint]), + Some([ReaderFeature::V2Checkpoint]), Some(&empty_features), ) .unwrap(); - assert!(protocol.ensure_read_supported().is_err()); + assert!(protocol.ensure_read_supported().is_ok()); let protocol = Protocol::try_new( 3, 7, Some(&empty_features), - Some([WriterFeatures::V2Checkpoint]), + Some([WriterFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -835,11 +938,11 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::V2Checkpoint]), - Some([WriterFeatures::V2Checkpoint]), + Some([ReaderFeature::V2Checkpoint]), + Some([WriterFeature::V2Checkpoint]), ) .unwrap(); - assert!(protocol.ensure_read_supported().is_err()); + assert!(protocol.ensure_read_supported().is_ok()); let protocol = Protocol { min_reader_version: 1, @@ -860,19 +963,24 @@ mod tests { #[test] fn test_ensure_write_supported() { - let protocol = Protocol { - min_reader_version: 3, - min_writer_version: 7, - reader_features: Some(vec![]), - writer_features: Some(vec![]), - }; + let protocol = Protocol::try_new( + 3, + 7, + Some::>(vec![]), + Some(vec![ + WriterFeature::AppendOnly, + WriterFeature::DeletionVectors, + WriterFeature::Invariants, + ]), + ) + .unwrap(); assert!(protocol.ensure_write_supported().is_ok()); let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::DeletionVectors]), - Some([WriterFeatures::DeletionVectors]), + Some([ReaderFeature::DeletionVectors]), + Some([WriterFeature::RowTracking]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_err()); @@ -880,26 +988,34 @@ mod tests { #[test] fn test_ensure_supported_features() { - let supported_features = [ - ReaderFeatures::ColumnMapping, - ReaderFeatures::DeletionVectors, - ] - .into_iter() - .collect(); - let table_features = vec![ReaderFeatures::ColumnMapping.to_string()]; + let supported_features = [ReaderFeature::ColumnMapping, ReaderFeature::DeletionVectors]; + let table_features = vec![ReaderFeature::ColumnMapping]; ensure_supported_features(&table_features, &supported_features).unwrap(); // test unknown features - let table_features = vec![ReaderFeatures::ColumnMapping.to_string(), "idk".to_string()]; + let table_features = vec![ReaderFeature::ColumnMapping, ReaderFeature::unknown("idk")]; let error = ensure_supported_features(&table_features, &supported_features).unwrap_err(); match error { Error::Unsupported(e) if e == - "Unknown ReaderFeatures [\"idk\"]. Supported ReaderFeatures are [ColumnMapping, DeletionVectors]" + "Unknown ReaderFeatures: \"idk\". Supported ReaderFeatures: \"columnMapping\", \"deletionVectors\"" => {}, - Error::Unsupported(e) if e == - "Unknown ReaderFeatures [\"idk\"]. Supported ReaderFeatures are [DeletionVectors, ColumnMapping]" - => {}, - _ => panic!("Expected unsupported error"), + _ => panic!("Expected unsupported error, got: {error}"), } } + + #[test] + fn test_parse_table_feature_never_fails() { + // parse a non-str + let features = Some([5]); + let expected = Some(vec![ReaderFeature::unknown("5")]); + assert_eq!(parse_features::(features), expected); + + // weird strs + let features = Some(["", "absurD_)(+13%^โš™๏ธ"]); + let expected = Some(vec![ + ReaderFeature::unknown(""), + ReaderFeature::unknown("absurD_)(+13%^โš™๏ธ"), + ]); + assert_eq!(parse_features::(features), expected); + } } diff --git a/kernel/src/actions/set_transaction.rs b/kernel/src/actions/set_transaction.rs index ea1ffa6a7..a0a0517fb 100644 --- a/kernel/src/actions/set_transaction.rs +++ b/kernel/src/actions/set_transaction.rs @@ -7,13 +7,16 @@ use crate::{ DeltaResult, Engine, EngineData, Expression as Expr, ExpressionRef, RowVisitor as _, SchemaRef, }; -pub use crate::actions::visitors::SetTransactionMap; -pub struct SetTransactionScanner { +pub(crate) use crate::actions::visitors::SetTransactionMap; + +#[allow(dead_code)] +pub(crate) struct SetTransactionScanner { snapshot: Arc, } +#[allow(dead_code)] impl SetTransactionScanner { - pub fn new(snapshot: Arc) -> Self { + pub(crate) fn new(snapshot: Arc) -> Self { SetTransactionScanner { snapshot } } @@ -59,13 +62,16 @@ impl SetTransactionScanner { Expr::column([SET_TRANSACTION_NAME, "appId"]).is_not_null(), )) }); - self.snapshot - .log_segment() - .replay(engine, schema.clone(), schema, META_PREDICATE.clone()) + self.snapshot.log_segment().read_actions( + engine, + schema.clone(), + schema, + META_PREDICATE.clone(), + ) } /// Scan the Delta Log for the latest transaction entry of an application - pub fn application_transaction( + pub(crate) fn application_transaction( &self, engine: &dyn Engine, application_id: &str, @@ -75,7 +81,10 @@ impl SetTransactionScanner { } /// Scan the Delta Log to obtain the latest transaction for all applications - pub fn application_transactions(&self, engine: &dyn Engine) -> DeltaResult { + pub(crate) fn application_transactions( + &self, + engine: &dyn Engine, + ) -> DeltaResult { self.scan_application_transactions(engine, None) } } diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index 9f34bd2c5..152a91e7d 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -352,7 +352,7 @@ impl RowVisitor for CdcVisitor { )) ); for i in 0..row_count { - // Since path column is required, use it to detect presence of an Add action + // Since path column is required, use it to detect presence of a Cdc action if let Some(path) = getters[0].get_opt(i, "cdc.path")? { self.cdcs.push(Self::visit_cdc(i, path, getters)?); } @@ -361,7 +361,7 @@ impl RowVisitor for CdcVisitor { } } -pub type SetTransactionMap = HashMap; +pub(crate) type SetTransactionMap = HashMap; /// Extract application transaction actions from the log into a map /// @@ -438,7 +438,6 @@ impl RowVisitor for SetTransactionVisitor { } } -#[allow(unused)] //TODO: Remove once we implement V2 checkpoint file processing #[derive(Default)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct SidecarVisitor { @@ -475,7 +474,7 @@ impl RowVisitor for SidecarVisitor { )) ); for i in 0..row_count { - // Since path column is required, use it to detect presence of a sidecar action + // Since path column is required, use it to detect presence of a Sidecar action if let Some(path) = getters[0].get_opt(i, "sidecar.path")? { self.sidecars.push(Self::visit_sidecar(i, path, getters)?); } @@ -512,45 +511,12 @@ pub(crate) fn visit_deletion_vector_at<'a>( #[cfg(test)] mod tests { - use std::sync::Arc; + use super::*; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use crate::arrow::array::StringArray; - use super::*; - use crate::{ - actions::get_log_schema, - engine::arrow_data::ArrowEngineData, - engine::sync::{json::SyncJsonHandler, SyncEngine}, - Engine, EngineData, JsonHandler, - }; - - // TODO(nick): Merge all copies of this into one "test utils" thing - fn string_array_to_engine_data(string_array: StringArray) -> Box { - let string_field = Arc::new(Field::new("a", DataType::Utf8, true)); - let schema = Arc::new(ArrowSchema::new(vec![string_field])); - let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)]) - .expect("Can't convert to record batch"); - Box::new(ArrowEngineData::new(batch)) - } - - fn action_batch() -> Box { - let handler = SyncJsonHandler {}; - let json_strings: StringArray = vec![ - r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues":{},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#, - r#"{"commitInfo":{"timestamp":1677811178585,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"635"},"engineInfo":"Databricks-Runtime/","txnId":"a6a94671-55ef-450e-9546-b8465b9147de"}}"#, - r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#, - r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none", "delta.enableChangeDataFeed":"true"},"createdTime":1677811175819}}"#, - r#"{"cdc":{"path":"_change_data/age=21/cdc-00000-93f7fceb-281a-446a-b221-07b88132d203.c000.snappy.parquet","partitionValues":{"age":"21"},"size":1033,"dataChange":false}}"#, - r#"{"sidecar":{"path":"016ae953-37a9-438e-8683-9a9a4a79a395.parquet","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{"tag_foo":"tag_bar"}}}"#, - ] - .into(); - let output_schema = get_log_schema().clone(); - let parsed = handler - .parse_json(string_array_to_engine_data(json_strings), output_schema) - .unwrap(); - ArrowEngineData::try_from_engine_data(parsed).unwrap() - } + use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::utils::test_utils::{action_batch, parse_json_batch}; #[test] fn test_parse_protocol() -> DeltaResult<()> { @@ -559,8 +525,8 @@ mod tests { let expected = Protocol { min_reader_version: 3, min_writer_version: 7, - reader_features: Some(vec!["deletionVectors".into()]), - writer_features: Some(vec!["deletionVectors".into()]), + reader_features: Some(vec![ReaderFeature::DeletionVectors]), + writer_features: Some(vec![WriterFeature::DeletionVectors]), }; assert_eq!(parsed, expected); Ok(()) @@ -640,8 +606,6 @@ mod tests { #[test] fn test_parse_add_partitioned() { - let engine = SyncEngine::new(); - let json_handler = engine.get_json_handler(); let json_strings: StringArray = vec![ r#"{"commitInfo":{"timestamp":1670892998177,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"c1\",\"c2\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}}"#, r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#, @@ -651,10 +615,7 @@ mod tests { r#"{"add":{"path":"c1=6/c2=a/part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet","partitionValues":{"c1":"6","c2":"a"},"size":452,"modificationTime":1670892998137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":4},\"maxValues\":{\"c3\":4},\"nullCount\":{\"c3\":0}}"}}"#, ] .into(); - let output_schema = get_log_schema().clone(); - let batch = json_handler - .parse_json(string_array_to_engine_data(json_strings), output_schema) - .unwrap(); + let batch = parse_json_batch(json_strings); let mut add_visitor = AddVisitor::default(); add_visitor.visit_rows_of(batch.as_ref()).unwrap(); let add1 = Add { @@ -698,18 +659,13 @@ mod tests { #[test] fn test_parse_remove_partitioned() { - let engine = SyncEngine::new(); - let json_handler = engine.get_json_handler(); let json_strings: StringArray = vec![ r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#, r#"{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}}"#, r#"{"remove":{"path":"c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452}}"#, ] .into(); - let output_schema = get_log_schema().clone(); - let batch = json_handler - .parse_json(string_array_to_engine_data(json_strings), output_schema) - .unwrap(); + let batch = parse_json_batch(json_strings); let mut remove_visitor = RemoveVisitor::default(); remove_visitor.visit_rows_of(batch.as_ref()).unwrap(); let expected_remove = Remove { @@ -737,8 +693,6 @@ mod tests { #[test] fn test_parse_txn() { - let engine = SyncEngine::new(); - let json_handler = engine.get_json_handler(); let json_strings: StringArray = vec![ r#"{"commitInfo":{"timestamp":1670892998177,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"c1\",\"c2\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}}"#, r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#, @@ -748,10 +702,7 @@ mod tests { r#"{"txn":{"appId":"myApp2","version": 4, "lastUpdated": 1670892998177}}"#, ] .into(); - let output_schema = get_log_schema().clone(); - let batch = json_handler - .parse_json(string_array_to_engine_data(json_strings), output_schema) - .unwrap(); + let batch = parse_json_batch(json_strings); let mut txn_visitor = SetTransactionVisitor::default(); txn_visitor.visit_rows_of(batch.as_ref()).unwrap(); let mut actual = txn_visitor.set_transactions; diff --git a/kernel/src/arrow.rs b/kernel/src/arrow.rs new file mode 100644 index 000000000..915e603af --- /dev/null +++ b/kernel/src/arrow.rs @@ -0,0 +1,17 @@ +//! This module exists to help re-export the version of arrow used by default-engine and other +//! parts of kernel that need arrow + +#[cfg(feature = "arrow_53")] +pub use arrow_53::*; + +#[cfg(all(feature = "arrow_54", not(feature = "arrow_53")))] +pub use arrow_54::*; + +// if nothing is enabled but we need arrow because of some other feature flag, default to lowest +// supported version +#[cfg(all( + feature = "need_arrow", + not(feature = "arrow_53"), + not(feature = "arrow_54") +))] +compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow_53` or `arrow_54` feature"); diff --git a/kernel/src/checkpoint/log_replay.rs b/kernel/src/checkpoint/log_replay.rs new file mode 100644 index 000000000..7867e042e --- /dev/null +++ b/kernel/src/checkpoint/log_replay.rs @@ -0,0 +1,588 @@ +//! The [`CheckpointLogReplayProcessor`] implements specialized log replay logic for creating +//! checkpoint files. It processes log files in reverse chronological order (newest to oldest) +//! and selects the set of actions to include in a checkpoint for a specific version. +//! +//! ## Actions Included for Checkpointing +//! +//! For checkpoint creation, this processor applies several filtering and deduplication +//! steps to each batch of log actions: +//! +//! 1. **Protocol and Metadata**: Retains exactly one of each - keeping only the latest protocol +//! and metadata actions. +//! 2. **Txn Actions**: Keeps exactly one `txn` action for each unique app ID, always selecting +//! the latest one encountered. +//! 3. **File Actions**: Resolves file actions to produce the latest state of the table, keeping +//! the most recent valid add actions and unexpired remove actions (tombstones) that are newer +//! than `minimum_file_retention_timestamp`. +//! +//! ## Architecture +//! +//! - [`CheckpointVisitor`]: Implements [`RowVisitor`] to examine each action in a batch and +//! determine if it should be included in the checkpoint. It maintains state for deduplication +//! across multiple actions in a batch and efficiently handles all filtering rules. +//! +//! - [`CheckpointLogReplayProcessor`]: Implements the [`LogReplayProcessor`] trait and orchestrates +//! the overall process. For each batch of log actions, it: +//! 1. Creates a visitor with the current deduplication state +//! 2. Applies the visitor to filter actions in the batch +//! 3. Updates counters and state for cross-batch deduplication +//! 4. Produces a [`CheckpointData`] result which includes a selection vector indicating which +//! actions should be included in the checkpoint file +use std::collections::HashSet; +use std::sync::LazyLock; + +use crate::engine_data::{GetData, RowVisitor, TypedGetData as _}; +use crate::log_replay::{FileActionDeduplicator, FileActionKey}; +use crate::schema::{column_name, ColumnName, ColumnNamesAndTypes, DataType}; +use crate::utils::require; +use crate::{DeltaResult, Error}; + +/// A visitor that filters actions for inclusion in a V1 spec checkpoint file. +/// +/// This visitor processes actions in newest-to-oldest order (as they appear in log +/// replay) and applies deduplication logic for both file and non-file actions to +/// produce the actions to include in a checkpoint. +/// +/// # File Action Filtering Rules: +/// Kept Actions: +/// - The first (newest) add action for each unique (path, dvId) pair +/// - The first (newest) remove action for each unique (path, dvId) pair, but only if +/// its deletionTimestamp > minimumFileRetentionTimestamp +/// Omitted Actions: +/// - Any file action (add/remove) with the same (path, dvId) as a previously processed action +/// - All remove actions with deletionTimestamp โ‰ค minimumFileRetentionTimestamp +/// - All remove actions with missing deletionTimestamp (defaults to 0) +/// +/// The resulting filtered file actions represents files present in the table (add actions) and +/// unexpired tombstones required for vacuum operations (remove actions). +/// +/// # Non-File Action Filtering: +/// - Keeps only the first protocol action (newest version) +/// - Keeps only the first metadata action (most recent table metadata) +/// - Keeps only the first txn action for each unique app ID +/// +/// # Excluded Actions +/// - CommitInfo, CDC, and CheckpointMetadata actions should not appear in the action +/// batches processed by this visitor, as they are excluded by the schema used to +/// read the log files upstream. If present, they will be ignored by the visitor. +/// - Sidecar actions should also be excludedโ€”when encountered in the log, the +/// corresponding sidecar files are read to extract the referenced file actions, +/// which are then included directly in the action stream instead of the sidecar actions themselves. +/// - The CheckpointMetadata action is included down the wire when writing a V2 spec checkpoint. +/// +/// # Memory Usage +/// This struct has O(N + M) memory usage where: +/// - N = number of txn actions with unique appIds +/// - M = number of file actions with unique (path, dvId) pairs +/// +/// The resulting filtered set of actions are the actions which should be written to a +/// checkpoint for a corresponding version. +pub(crate) struct CheckpointVisitor<'seen> { + // Deduplicates file actions (applies logic to filter Adds with corresponding Removes, + // and keep unexpired Removes). This deduplicator builds a set of seen file actions. + // This set has O(M) memory usage where M = number of file actions with unique (path, dvId) pairs + deduplicator: FileActionDeduplicator<'seen>, + // Tracks which rows to include in the final output + selection_vector: Vec, + // TODO: _last_checkpoint schema should be updated to use u64 instead of i64 + // for fields that are not expected to be negative. (Issue #786) + // i64 to match the `_last_checkpoint` file schema + non_file_actions_count: i64, + // i64 to match the `_last_checkpoint` file schema + file_actions_count: i64, + // i64 to match the `_last_checkpoint` file schema + add_actions_count: i64, + // i64 for comparison with remove.deletionTimestamp + minimum_file_retention_timestamp: i64, + // Flag to track if we've seen a protocol action so we can keep only the first protocol action + seen_protocol: bool, + // Flag to track if we've seen a metadata action so we can keep only the first metadata action + seen_metadata: bool, + // Set of transaction IDs to deduplicate by appId + // This set has O(N) memory usage where N = number of txn actions with unique appIds + seen_txns: &'seen mut HashSet, +} + +#[allow(unused)] +impl CheckpointVisitor<'_> { + // These index positions correspond to the order of columns defined in + // `selected_column_names_and_types()` + const ADD_PATH_INDEX: usize = 0; // Position of "add.path" in getters + const ADD_DV_START_INDEX: usize = 1; // Start position of add deletion vector columns + const REMOVE_PATH_INDEX: usize = 4; // Position of "remove.path" in getters + const REMOVE_DELETION_TIMESTAMP_INDEX: usize = 5; // Position of "remove.deletionTimestamp" in getters + const REMOVE_DV_START_INDEX: usize = 6; // Start position of remove deletion vector columns + + // These are the column names used to access the data in the getters + const REMOVE_DELETION_TIMESTAMP: &'static str = "remove.deletionTimestamp"; + const PROTOCOL_MIN_READER_VERSION: &'static str = "protocol.minReaderVersion"; + const METADATA_ID: &'static str = "metaData.id"; + + pub(crate) fn new<'seen>( + seen_file_keys: &'seen mut HashSet, + is_log_batch: bool, + selection_vector: Vec, + minimum_file_retention_timestamp: i64, + seen_protocol: bool, + seen_metadata: bool, + seen_txns: &'seen mut HashSet, + ) -> CheckpointVisitor<'seen> { + CheckpointVisitor { + deduplicator: FileActionDeduplicator::new( + seen_file_keys, + is_log_batch, + Self::ADD_PATH_INDEX, + Self::REMOVE_PATH_INDEX, + Self::ADD_DV_START_INDEX, + Self::REMOVE_DV_START_INDEX, + ), + selection_vector, + file_actions_count: 0, + add_actions_count: 0, + minimum_file_retention_timestamp, + seen_protocol, + seen_metadata, + seen_txns, + non_file_actions_count: 0, + } + } + + /// Determines if a remove action tombstone has expired and should be excluded from the checkpoint. + /// + /// A remove action includes a deletion_timestamp indicating when the deletion occurred. Physical + /// files are deleted lazily after a user-defined expiration time. Remove actions are kept to allow + /// concurrent readers to read snapshots at older versions. + /// + /// Tombstone expiration rules: + /// - If deletion_timestamp <= minimum_file_retention_timestamp: Expired (exclude) + /// - If deletion_timestamp > minimum_file_retention_timestamp: Valid (include) + /// - If deletion_timestamp is missing: Defaults to 0, treated as expired (exclude) + fn is_expired_tombstone<'a>(&self, i: usize, getter: &'a dyn GetData<'a>) -> DeltaResult { + // Ideally this should never be zero, but we are following the same behavior as Delta + // Spark and the Java Kernel. + // Note: When remove.deletion_timestamp is not present (defaulting to 0), the remove action + // will be excluded from the checkpoint file as it will be treated as expired. + let deletion_timestamp = getter.get_opt(i, "remove.deletionTimestamp")?; + let deletion_timestamp = deletion_timestamp.unwrap_or(0i64); + + Ok(deletion_timestamp <= self.minimum_file_retention_timestamp) + } + + /// Processes a potential file action to determine if it should be included in the checkpoint. + /// + /// Returns Ok(true) if the row contains a valid file action to be included in the checkpoint. + /// Returns Ok(false) if the row doesn't contain a file action or should be skipped. + /// Returns Err(...) if there was an error processing the action. + /// + /// Note: This function handles both add and remove actions, applying deduplication logic and + /// tombstone expiration rules as needed. + fn check_file_action<'a>( + &mut self, + i: usize, + getters: &[&'a dyn GetData<'a>], + ) -> DeltaResult { + // Extract the file action and handle errors immediately + let (file_key, is_add) = match self.deduplicator.extract_file_action(i, getters, false)? { + Some(action) => action, + None => return Ok(false), // If no file action is found, skip this row + }; + + // Check if we've already seen this file action + if self.deduplicator.check_and_record_seen(file_key) { + return Ok(false); // Skip file actions that we've processed before + } + + // Check for valid, non-duplicate adds and non-expired removes + if is_add { + self.add_actions_count += 1; + } else if self.is_expired_tombstone(i, getters[Self::REMOVE_DELETION_TIMESTAMP_INDEX])? { + return Ok(false); // Skip expired remove tombstones + } + self.file_actions_count += 1; + Ok(true) // Include this action + } + + /// Processes a potential protocol action to determine if it should be included in the checkpoint. + /// + /// Returns Ok(true) if the row contains a valid protocol action. + /// Returns Ok(false) if the row doesn't contain a protocol action or is a duplicate. + /// Returns Err(...) if there was an error processing the action. + fn check_protocol_action<'a>( + &mut self, + i: usize, + getter: &'a dyn GetData<'a>, + ) -> DeltaResult { + // Skip protocol actions if we've already seen a newer one + if self.seen_protocol { + return Ok(false); + } + + // minReaderVersion is a required field, so we check for its presence to determine if this is a protocol action. + if getter + .get_int(i, Self::PROTOCOL_MIN_READER_VERSION)? + .is_none() + { + return Ok(false); // Not a protocol action + } + // Valid, non-duplicate protocol action to be included + self.seen_protocol = true; + self.non_file_actions_count += 1; + Ok(true) + } + + /// Processes a potential metadata action to determine if it should be included in the checkpoint. + /// + /// Returns Ok(true) if the row contains a valid metadata action. + /// Returns Ok(false) if the row doesn't contain a metadata action or is a duplicate. + /// Returns Err(...) if there was an error processing the action. + fn check_metadata_action<'a>( + &mut self, + i: usize, + getter: &'a dyn GetData<'a>, + ) -> DeltaResult { + // Skip metadata actions if we've already seen a newer one + if self.seen_metadata { + return Ok(false); + } + + // id is a required field, so we check for its presence to determine if this is a metadata action. + if getter.get_str(i, Self::METADATA_ID)?.is_none() { + return Ok(false); // Not a metadata action + } + + // Valid, non-duplicate metadata action to be included + self.seen_metadata = true; + self.non_file_actions_count += 1; + Ok(true) + } + + /// Processes a potential txn action to determine if it should be included in the checkpoint. + /// + /// Returns Ok(true) if the row contains a valid txn action. + /// Returns Ok(false) if the row doesn't contain a txn action or is a duplicate. + /// Returns Err(...) if there was an error processing the action. + fn check_txn_action<'a>(&mut self, i: usize, getter: &'a dyn GetData<'a>) -> DeltaResult { + // Check for txn field + let Some(app_id) = getter.get_str(i, "txn.appId")? else { + return Ok(false); // Not a txn action + }; + + // If the app ID already exists in the set, the insertion will return false, + // indicating that this is a duplicate. + if !self.seen_txns.insert(app_id.to_string()) { + return Ok(false); + } + + // Valid, non-duplicate txn action to be included + self.non_file_actions_count += 1; + Ok(true) + } + + /// Determines if a row in the batch should be included in the checkpoint. + /// + /// This method checks each action type in sequence, short-circuiting as soon as a valid action is found. + /// Actions are checked in order of expected frequency of occurrence to optimize performance: + /// 1. File actions (most frequent) + /// 2. Txn actions + /// 3. Protocol & Metadata actions (least frequent) + /// + /// Returns Ok(true) if the row should be included in the checkpoint. + /// Returns Ok(false) if the row should be skipped. + /// Returns Err(...) if any validation or extraction failed. + pub(crate) fn is_valid_action<'a>( + &mut self, + i: usize, + getters: &[&'a dyn GetData<'a>], + ) -> DeltaResult { + // The `||` operator short-circuits the evaluation, so if any of the checks return true, + // the rest will not be evaluated. + Ok(self.check_file_action(i, getters)? + || self.check_txn_action(i, getters[11])? + || self.check_protocol_action(i, getters[10])? + || self.check_metadata_action(i, getters[9])?) + } +} + +impl RowVisitor for CheckpointVisitor<'_> { + fn selected_column_names_and_types(&self) -> (&'static [ColumnName], &'static [DataType]) { + // The data columns visited must be in the following order: + // 1. ADD + // 2. REMOVE + // 3. METADATA + // 4. PROTOCOL + // 5. TXN + static NAMES_AND_TYPES: LazyLock = LazyLock::new(|| { + const STRING: DataType = DataType::STRING; + const INTEGER: DataType = DataType::INTEGER; + const LONG: DataType = DataType::LONG; + let types_and_names = vec![ + // File action columns + (STRING, column_name!("add.path")), + (STRING, column_name!("add.deletionVector.storageType")), + (STRING, column_name!("add.deletionVector.pathOrInlineDv")), + (INTEGER, column_name!("add.deletionVector.offset")), + (STRING, column_name!("remove.path")), + (LONG, column_name!("remove.deletionTimestamp")), + (STRING, column_name!("remove.deletionVector.storageType")), + (STRING, column_name!("remove.deletionVector.pathOrInlineDv")), + (INTEGER, column_name!("remove.deletionVector.offset")), + // Non-file action columns + (STRING, column_name!("metaData.id")), + (INTEGER, column_name!("protocol.minReaderVersion")), + (STRING, column_name!("txn.appId")), + ]; + let (types, names) = types_and_names.into_iter().unzip(); + (names, types).into() + }); + NAMES_AND_TYPES.as_ref() + } + + fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<()> { + require!( + getters.len() == 12, + Error::InternalError(format!( + "Wrong number of visitor getters: {}", + getters.len() + )) + ); + + for i in 0..row_count { + self.selection_vector[i] = self.is_valid_action(i, getters)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use crate::arrow::array::StringArray; + use crate::utils::test_utils::{action_batch, parse_json_batch}; + + use super::*; + + #[test] + fn test_checkpoint_visitor() -> DeltaResult<()> { + let data = action_batch(); + let mut seen_file_keys = HashSet::new(); + let mut seen_txns = HashSet::new(); + let mut visitor = CheckpointVisitor::new( + &mut seen_file_keys, + true, + vec![true; 8], + 0, // minimum_file_retention_timestamp (no expired tombstones) + false, + false, + &mut seen_txns, + ); + + visitor.visit_rows_of(data.as_ref())?; + + let expected = vec![ + true, // Row 0 is an add action (included) + true, // Row 1 is a remove action (included) + false, // Row 2 is a commit info action (excluded) + true, // Row 3 is a protocol action (included) + true, // Row 4 is a metadata action (included) + false, // Row 5 is a cdc action (excluded) + false, // Row 6 is a sidecar action (excluded) + true, // Row 7 is a txn action (included) + ]; + + assert_eq!(visitor.file_actions_count, 2); + assert_eq!(visitor.add_actions_count, 1); + assert!(visitor.seen_protocol); + assert!(visitor.seen_metadata); + assert_eq!(visitor.seen_txns.len(), 1); + assert_eq!(visitor.non_file_actions_count, 3); + + assert_eq!(visitor.selection_vector, expected); + Ok(()) + } + + /// Tests the boundary conditions for tombstone expiration logic. + /// Specifically checks: + /// - Remove actions with deletionTimestamp == minimumFileRetentionTimestamp (should be excluded) + /// - Remove actions with deletionTimestamp < minimumFileRetentionTimestamp (should be excluded) + /// - Remove actions with deletionTimestamp > minimumFileRetentionTimestamp (should be included) + /// - Remove actions with missing deletionTimestamp (defaults to 0, should be excluded) + #[test] + fn test_checkpoint_visitor_boundary_cases_for_tombstone_expiration() -> DeltaResult<()> { + let json_strings: StringArray = vec![ + r#"{"remove":{"path":"exactly_at_threshold","deletionTimestamp":100,"dataChange":true,"partitionValues":{}}}"#, + r#"{"remove":{"path":"one_below_threshold","deletionTimestamp":99,"dataChange":true,"partitionValues":{}}}"#, + r#"{"remove":{"path":"one_above_threshold","deletionTimestamp":101,"dataChange":true,"partitionValues":{}}}"#, + // Missing timestamp defaults to 0 + r#"{"remove":{"path":"missing_timestamp","dataChange":true,"partitionValues":{}}}"#, + ] + .into(); + let batch = parse_json_batch(json_strings); + + let mut seen_file_keys = HashSet::new(); + let mut seen_txns = HashSet::new(); + let mut visitor = CheckpointVisitor::new( + &mut seen_file_keys, + true, + vec![true; 4], + 100, // minimum_file_retention_timestamp (threshold set to 100) + false, + false, + &mut seen_txns, + ); + + visitor.visit_rows_of(batch.as_ref())?; + + // Only "one_above_threshold" should be kept + let expected = vec![false, false, true, false]; + assert_eq!(visitor.selection_vector, expected); + assert_eq!(visitor.file_actions_count, 1); + assert_eq!(visitor.add_actions_count, 0); + assert_eq!(visitor.non_file_actions_count, 0); + Ok(()) + } + + #[test] + fn test_checkpoint_visitor_file_actions_in_checkpoint_batch() -> DeltaResult<()> { + let json_strings: StringArray = vec![ + r#"{"add":{"path":"file1","partitionValues":{"c1":"6","c2":"a"},"size":452,"modificationTime":1670892998137,"dataChange":true}}"#, + ] + .into(); + let batch = parse_json_batch(json_strings); + + let mut seen_file_keys = HashSet::new(); + let mut seen_txns = HashSet::new(); + let mut visitor = CheckpointVisitor::new( + &mut seen_file_keys, + false, // is_log_batch = false (checkpoint batch) + vec![true; 1], + 0, + false, + false, + &mut seen_txns, + ); + + visitor.visit_rows_of(batch.as_ref())?; + + let expected = vec![true]; + assert_eq!(visitor.selection_vector, expected); + assert_eq!(visitor.file_actions_count, 1); + assert_eq!(visitor.add_actions_count, 1); + assert_eq!(visitor.non_file_actions_count, 0); + // The action should NOT be added to the seen_file_keys set as it's a checkpoint batch + // and actions in checkpoint batches do not conflict with each other. + // This is a key difference from log batches, where actions can conflict. + assert!(seen_file_keys.is_empty()); + Ok(()) + } + + #[test] + fn test_checkpoint_visitor_conflicts_with_deletion_vectors() -> DeltaResult<()> { + let json_strings: StringArray = vec![ + // Add action for file1 with deletion vector + r#"{"add":{"path":"file1","partitionValues":{},"size":635,"modificationTime":100,"dataChange":true,"deletionVector":{"storageType":"two","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#, + // Remove action for file1 with a different deletion vector + r#"{"remove":{"path":"file1","deletionTimestamp":100,"dataChange":true,"deletionVector":{"storageType":"one","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#, + // Add action for file1 with the same deletion vector as the remove action above (excluded) + r#"{"add":{"path":"file1","partitionValues":{},"size":635,"modificationTime":100,"dataChange":true,"deletionVector":{"storageType":"one","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#, + ] + .into(); + let batch = parse_json_batch(json_strings); + + let mut seen_file_keys = HashSet::new(); + let mut seen_txns = HashSet::new(); + let mut visitor = CheckpointVisitor::new( + &mut seen_file_keys, + true, + vec![true; 3], + 0, + false, + false, + &mut seen_txns, + ); + + visitor.visit_rows_of(batch.as_ref())?; + + let expected = vec![true, true, false]; + assert_eq!(visitor.selection_vector, expected); + assert_eq!(visitor.file_actions_count, 2); + assert_eq!(visitor.add_actions_count, 1); + assert_eq!(visitor.non_file_actions_count, 0); + + Ok(()) + } + + #[test] + fn test_checkpoint_visitor_already_seen_non_file_actions() -> DeltaResult<()> { + let json_strings: StringArray = vec![ + r#"{"txn":{"appId":"app1","version":1,"lastUpdated":123456789}}"#, + r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#, + r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1677811175819}}"#, + ].into(); + let batch = parse_json_batch(json_strings); + + // Pre-populate with txn app1 + let mut seen_file_keys = HashSet::new(); + let mut seen_txns = HashSet::new(); + seen_txns.insert("app1".to_string()); + + let mut visitor = CheckpointVisitor::new( + &mut seen_file_keys, + true, + vec![true; 3], + 0, + true, // The visior has already seen a protocol action + true, // The visitor has already seen a metadata action + &mut seen_txns, // Pre-populated transaction + ); + + visitor.visit_rows_of(batch.as_ref())?; + + // All actions should be skipped as they have already been seen + let expected = vec![false, false, false]; + assert_eq!(visitor.selection_vector, expected); + assert_eq!(visitor.non_file_actions_count, 0); + assert_eq!(visitor.file_actions_count, 0); + + Ok(()) + } + + #[test] + fn test_checkpoint_visitor_duplicate_non_file_actions() -> DeltaResult<()> { + let json_strings: StringArray = vec![ + r#"{"txn":{"appId":"app1","version":1,"lastUpdated":123456789}}"#, + r#"{"txn":{"appId":"app1","version":1,"lastUpdated":123456789}}"#, // Duplicate txn + r#"{"txn":{"appId":"app2","version":1,"lastUpdated":123456789}}"#, // Different app ID + r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7}}"#, + r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7}}"#, // Duplicate protocol + r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1677811175819}}"#, + // Duplicate metadata + r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1677811175819}}"#, + ] + .into(); + let batch = parse_json_batch(json_strings); + + let mut seen_file_keys = HashSet::new(); + let mut seen_txns = HashSet::new(); + let mut visitor = CheckpointVisitor::new( + &mut seen_file_keys, + true, // is_log_batch + vec![true; 7], + 0, // minimum_file_retention_timestamp + false, + false, + &mut seen_txns, + ); + + visitor.visit_rows_of(batch.as_ref())?; + + // First occurrence of each type should be included + let expected = vec![true, false, true, true, false, true, false]; + assert_eq!(visitor.selection_vector, expected); + assert_eq!(visitor.seen_txns.len(), 2); // Two different app IDs + assert_eq!(visitor.non_file_actions_count, 4); // 2 txns + 1 protocol + 1 metadata + assert_eq!(visitor.file_actions_count, 0); + + Ok(()) + } +} diff --git a/kernel/src/checkpoint/mod.rs b/kernel/src/checkpoint/mod.rs new file mode 100644 index 000000000..e18479696 --- /dev/null +++ b/kernel/src/checkpoint/mod.rs @@ -0,0 +1,8 @@ +//! # Delta Kernel Checkpoint API +//! +//! This module implements the API for writing checkpoints in delta tables. +//! Checkpoints provide a compact summary of the table state, enabling faster recovery by +//! avoiding full log replay. This API supports three checkpoint types: +//! +//! TODO!(seb): Include docs when implemented +mod log_replay; diff --git a/kernel/src/engine/arrow_conversion.rs b/kernel/src/engine/arrow_conversion.rs index 0b905ff3a..a425cd143 100644 --- a/kernel/src/engine/arrow_conversion.rs +++ b/kernel/src/engine/arrow_conversion.rs @@ -2,10 +2,11 @@ use std::sync::Arc; -use arrow_schema::{ - ArrowError, DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, +use crate::arrow::datatypes::{ + DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, }; +use crate::arrow::error::ArrowError; use itertools::Itertools; use crate::error::Error; diff --git a/kernel/src/engine/arrow_data.rs b/kernel/src/engine/arrow_data.rs index 50a627e5c..de2dbfbec 100644 --- a/kernel/src/engine/arrow_data.rs +++ b/kernel/src/engine/arrow_data.rs @@ -2,17 +2,23 @@ use crate::engine_data::{EngineData, EngineList, EngineMap, GetData, RowVisitor} use crate::schema::{ColumnName, DataType}; use crate::{DeltaResult, Error}; -use arrow_array::cast::AsArray; -use arrow_array::types::{Int32Type, Int64Type}; -use arrow_array::{ +use crate::arrow::array::cast::AsArray; +use crate::arrow::array::types::{Int32Type, Int64Type}; +use crate::arrow::array::{ Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, RecordBatch, StructArray, }; -use arrow_schema::{DataType as ArrowDataType, FieldRef}; +use crate::arrow::datatypes::{DataType as ArrowDataType, FieldRef}; use tracing::debug; use std::collections::{HashMap, HashSet}; -/// ArrowEngineData holds an Arrow RecordBatch, implements `EngineData` so the kernel can extract from it. +pub use crate::engine::arrow_utils::fix_nested_null_masks; + +/// ArrowEngineData holds an Arrow `RecordBatch`, implements `EngineData` so the kernel can extract from it. +/// +/// WARNING: Row visitors require that all leaf columns of the record batch have correctly computed +/// NULL masks. The arrow parquet reader is known to produce incomplete NULL masks, for +/// example. When in doubt, call [`fix_nested_null_masks`] first. pub struct ArrowEngineData { data: RecordBatch, } @@ -43,6 +49,12 @@ impl From for ArrowEngineData { } } +impl From for ArrowEngineData { + fn from(value: StructArray) -> Self { + ArrowEngineData::new(value.into()) + } +} + impl From for RecordBatch { fn from(value: ArrowEngineData) -> Self { value.data @@ -282,31 +294,20 @@ impl ArrowEngineData { #[cfg(test)] mod tests { - use std::sync::Arc; - - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use crate::arrow::array::StringArray; + use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::utils::test_utils::string_array_to_engine_data; use crate::{ actions::{get_log_schema, Metadata, Protocol}, engine::sync::SyncEngine, - DeltaResult, Engine, EngineData, + DeltaResult, Engine, }; - use super::ArrowEngineData; - - fn string_array_to_engine_data(string_array: StringArray) -> Box { - let string_field = Arc::new(Field::new("a", DataType::Utf8, true)); - let schema = Arc::new(ArrowSchema::new(vec![string_field])); - let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)]) - .expect("Can't convert to record batch"); - Box::new(ArrowEngineData::new(batch)) - } - #[test] fn test_md_extract() -> DeltaResult<()> { let engine = SyncEngine::new(); - let handler = engine.get_json_handler(); + let handler = engine.json_handler(); let json_strings: StringArray = vec![ r#"{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}}"#, ] @@ -325,7 +326,7 @@ mod tests { #[test] fn test_protocol_extract() -> DeltaResult<()> { let engine = SyncEngine::new(); - let handler = engine.get_json_handler(); + let handler = engine.json_handler(); let json_strings: StringArray = vec![ r#"{"protocol": {"minReaderVersion": 3, "minWriterVersion": 7, "readerFeatures": ["rw1"], "writerFeatures": ["rw1", "w2"]}}"#, ] @@ -337,10 +338,13 @@ mod tests { let protocol = Protocol::try_new_from_data(parsed.as_ref())?.unwrap(); assert_eq!(protocol.min_reader_version(), 3); assert_eq!(protocol.min_writer_version(), 7); - assert_eq!(protocol.reader_features(), Some(["rw1".into()].as_slice())); + assert_eq!( + protocol.reader_features(), + Some([ReaderFeature::unknown("rw1")].as_slice()) + ); assert_eq!( protocol.writer_features(), - Some(["rw1".into(), "w2".into()].as_slice()) + Some([WriterFeature::unknown("rw1"), WriterFeature::unknown("w2")].as_slice()) ); Ok(()) } diff --git a/kernel/src/engine/arrow_expression.rs b/kernel/src/engine/arrow_expression.rs deleted file mode 100644 index 8ee54ebd0..000000000 --- a/kernel/src/engine/arrow_expression.rs +++ /dev/null @@ -1,870 +0,0 @@ -//! Expression handling based on arrow-rs compute kernels. -use std::borrow::Borrow; -use std::collections::HashMap; -use std::sync::Arc; - -use arrow_arith::boolean::{and_kleene, is_null, not, or_kleene}; -use arrow_arith::numeric::{add, div, mul, sub}; -use arrow_array::cast::AsArray; -use arrow_array::{types::*, MapArray}; -use arrow_array::{ - Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Datum, Decimal128Array, Float32Array, - Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, RecordBatch, - StringArray, StructArray, TimestampMicrosecondArray, -}; -use arrow_buffer::OffsetBuffer; -use arrow_ord::cmp::{distinct, eq, gt, gt_eq, lt, lt_eq, neq}; -use arrow_ord::comparison::in_list_utf8; -use arrow_schema::{ - ArrowError, DataType as ArrowDataType, Field as ArrowField, Fields, IntervalUnit, - Schema as ArrowSchema, TimeUnit, -}; -use arrow_select::concat::concat; -use itertools::Itertools; - -use super::arrow_conversion::LIST_ARRAY_ROOT; -use super::arrow_utils::make_arrow_error; -use crate::engine::arrow_data::ArrowEngineData; -use crate::engine::arrow_utils::prim_array_cmp; -use crate::engine::ensure_data_types::ensure_data_types; -use crate::error::{DeltaResult, Error}; -use crate::expressions::{ - BinaryExpression, BinaryOperator, Expression, Scalar, UnaryExpression, UnaryOperator, - VariadicExpression, VariadicOperator, -}; -use crate::schema::{ArrayType, DataType, MapType, PrimitiveType, Schema, SchemaRef, StructField}; -use crate::{EngineData, ExpressionEvaluator, ExpressionHandler}; - -// TODO leverage scalars / Datum - -fn downcast_to_bool(arr: &dyn Array) -> DeltaResult<&BooleanArray> { - arr.as_any() - .downcast_ref::() - .ok_or_else(|| Error::generic("expected boolean array")) -} - -impl Scalar { - /// Convert scalar to arrow array. - pub fn to_array(&self, num_rows: usize) -> DeltaResult { - use Scalar::*; - let arr: ArrayRef = match self { - Integer(val) => Arc::new(Int32Array::from_value(*val, num_rows)), - Long(val) => Arc::new(Int64Array::from_value(*val, num_rows)), - Short(val) => Arc::new(Int16Array::from_value(*val, num_rows)), - Byte(val) => Arc::new(Int8Array::from_value(*val, num_rows)), - Float(val) => Arc::new(Float32Array::from_value(*val, num_rows)), - Double(val) => Arc::new(Float64Array::from_value(*val, num_rows)), - String(val) => Arc::new(StringArray::from(vec![val.clone(); num_rows])), - Boolean(val) => Arc::new(BooleanArray::from(vec![*val; num_rows])), - Timestamp(val) => { - Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows).with_timezone("UTC")) - } - TimestampNtz(val) => Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows)), - Date(val) => Arc::new(Date32Array::from_value(*val, num_rows)), - Binary(val) => Arc::new(BinaryArray::from(vec![val.as_slice(); num_rows])), - Decimal(val, precision, scale) => Arc::new( - Decimal128Array::from_value(*val, num_rows) - .with_precision_and_scale(*precision, *scale as i8)?, - ), - Struct(data) => { - let arrays = data - .values() - .iter() - .map(|val| val.to_array(num_rows)) - .try_collect()?; - let fields: Fields = data - .fields() - .iter() - .map(ArrowField::try_from) - .try_collect()?; - Arc::new(StructArray::try_new(fields, arrays, None)?) - } - Array(data) => { - #[allow(deprecated)] - let values = data.array_elements(); - let vecs: Vec<_> = values.iter().map(|v| v.to_array(num_rows)).try_collect()?; - let values: Vec<_> = vecs.iter().map(|x| x.as_ref()).collect(); - let offsets: Vec<_> = vecs.iter().map(|v| v.len()).collect(); - let offset_buffer = OffsetBuffer::from_lengths(offsets); - let field = ArrowField::try_from(data.array_type())?; - Arc::new(ListArray::new( - Arc::new(field), - offset_buffer, - concat(values.as_slice())?, - None, - )) - } - Null(data_type) => match data_type { - DataType::Primitive(primitive) => match primitive { - PrimitiveType::Byte => Arc::new(Int8Array::new_null(num_rows)), - PrimitiveType::Short => Arc::new(Int16Array::new_null(num_rows)), - PrimitiveType::Integer => Arc::new(Int32Array::new_null(num_rows)), - PrimitiveType::Long => Arc::new(Int64Array::new_null(num_rows)), - PrimitiveType::Float => Arc::new(Float32Array::new_null(num_rows)), - PrimitiveType::Double => Arc::new(Float64Array::new_null(num_rows)), - PrimitiveType::String => Arc::new(StringArray::new_null(num_rows)), - PrimitiveType::Boolean => Arc::new(BooleanArray::new_null(num_rows)), - PrimitiveType::Timestamp => { - Arc::new(TimestampMicrosecondArray::new_null(num_rows).with_timezone("UTC")) - } - PrimitiveType::TimestampNtz => { - Arc::new(TimestampMicrosecondArray::new_null(num_rows)) - } - PrimitiveType::Date => Arc::new(Date32Array::new_null(num_rows)), - PrimitiveType::Binary => Arc::new(BinaryArray::new_null(num_rows)), - PrimitiveType::Decimal(precision, scale) => Arc::new( - Decimal128Array::new_null(num_rows) - .with_precision_and_scale(*precision, *scale as i8)?, - ), - }, - DataType::Struct(t) => { - let fields: Fields = t.fields().map(ArrowField::try_from).try_collect()?; - Arc::new(StructArray::new_null(fields, num_rows)) - } - DataType::Array(t) => { - let field = - ArrowField::new(LIST_ARRAY_ROOT, t.element_type().try_into()?, true); - Arc::new(ListArray::new_null(Arc::new(field), num_rows)) - } - DataType::Map { .. } => unimplemented!(), - }, - }; - Ok(arr) - } -} - -fn wrap_comparison_result(arr: BooleanArray) -> ArrayRef { - Arc::new(arr) as _ -} - -trait ProvidesColumnByName { - fn column_by_name(&self, name: &str) -> Option<&ArrayRef>; -} - -impl ProvidesColumnByName for RecordBatch { - fn column_by_name(&self, name: &str) -> Option<&ArrayRef> { - self.column_by_name(name) - } -} - -impl ProvidesColumnByName for StructArray { - fn column_by_name(&self, name: &str) -> Option<&ArrayRef> { - self.column_by_name(name) - } -} - -// Given a RecordBatch or StructArray, recursively probe for a nested column path and return the -// corresponding column, or Err if the path is invalid. For example, given the following schema: -// ```text -// root: { -// a: int32, -// b: struct { -// c: int32, -// d: struct { -// e: int32, -// f: int64, -// }, -// }, -// } -// ``` -// The path ["b", "d", "f"] would retrieve the int64 column while ["a", "b"] would produce an error. -fn extract_column(mut parent: &dyn ProvidesColumnByName, col: &[String]) -> DeltaResult { - let mut field_names = col.iter(); - let Some(mut field_name) = field_names.next() else { - return Err(ArrowError::SchemaError("Empty column path".to_string()))?; - }; - loop { - let child = parent - .column_by_name(field_name) - .ok_or_else(|| ArrowError::SchemaError(format!("No such field: {field_name}")))?; - field_name = match field_names.next() { - Some(name) => name, - None => return Ok(child.clone()), - }; - parent = child - .as_any() - .downcast_ref::() - .ok_or_else(|| ArrowError::SchemaError(format!("Not a struct: {field_name}")))?; - } -} - -fn evaluate_expression( - expression: &Expression, - batch: &RecordBatch, - result_type: Option<&DataType>, -) -> DeltaResult { - use BinaryOperator::*; - use Expression::*; - match (expression, result_type) { - (Literal(scalar), _) => Ok(scalar.to_array(batch.num_rows())?), - (Column(name), _) => extract_column(batch, name), - (Struct(fields), Some(DataType::Struct(output_schema))) => { - let columns = fields - .iter() - .zip(output_schema.fields()) - .map(|(expr, field)| evaluate_expression(expr, batch, Some(field.data_type()))); - let output_cols: Vec = columns.try_collect()?; - let output_fields: Vec = output_cols - .iter() - .zip(output_schema.fields()) - .map(|(output_col, output_field)| -> DeltaResult<_> { - Ok(ArrowField::new( - output_field.name(), - output_col.data_type().clone(), - output_col.is_nullable(), - )) - }) - .try_collect()?; - let result = StructArray::try_new(output_fields.into(), output_cols, None)?; - Ok(Arc::new(result)) - } - (Struct(_), _) => Err(Error::generic( - "Data type is required to evaluate struct expressions", - )), - (Unary(UnaryExpression { op, expr }), _) => { - let arr = evaluate_expression(expr.as_ref(), batch, None)?; - Ok(match op { - UnaryOperator::Not => Arc::new(not(downcast_to_bool(&arr)?)?), - UnaryOperator::IsNull => Arc::new(is_null(&arr)?), - }) - } - ( - Binary(BinaryExpression { - op: In, - left, - right, - }), - _, - ) => match (left.as_ref(), right.as_ref()) { - (Literal(_), Column(_)) => { - let left_arr = evaluate_expression(left.as_ref(), batch, None)?; - let right_arr = evaluate_expression(right.as_ref(), batch, None)?; - if let Some(string_arr) = left_arr.as_string_opt::() { - if let Some(right_arr) = right_arr.as_list_opt::() { - return in_list_utf8(string_arr, right_arr) - .map(wrap_comparison_result) - .map_err(Error::generic_err); - } - } - prim_array_cmp! { - left_arr, right_arr, - (ArrowDataType::Int8, Int8Type), - (ArrowDataType::Int16, Int16Type), - (ArrowDataType::Int32, Int32Type), - (ArrowDataType::Int64, Int64Type), - (ArrowDataType::UInt8, UInt8Type), - (ArrowDataType::UInt16, UInt16Type), - (ArrowDataType::UInt32, UInt32Type), - (ArrowDataType::UInt64, UInt64Type), - (ArrowDataType::Float16, Float16Type), - (ArrowDataType::Float32, Float32Type), - (ArrowDataType::Float64, Float64Type), - (ArrowDataType::Timestamp(TimeUnit::Second, _), TimestampSecondType), - (ArrowDataType::Timestamp(TimeUnit::Millisecond, _), TimestampMillisecondType), - (ArrowDataType::Timestamp(TimeUnit::Microsecond, _), TimestampMicrosecondType), - (ArrowDataType::Timestamp(TimeUnit::Nanosecond, _), TimestampNanosecondType), - (ArrowDataType::Date32, Date32Type), - (ArrowDataType::Date64, Date64Type), - (ArrowDataType::Time32(TimeUnit::Second), Time32SecondType), - (ArrowDataType::Time32(TimeUnit::Millisecond), Time32MillisecondType), - (ArrowDataType::Time64(TimeUnit::Microsecond), Time64MicrosecondType), - (ArrowDataType::Time64(TimeUnit::Nanosecond), Time64NanosecondType), - (ArrowDataType::Duration(TimeUnit::Second), DurationSecondType), - (ArrowDataType::Duration(TimeUnit::Millisecond), DurationMillisecondType), - (ArrowDataType::Duration(TimeUnit::Microsecond), DurationMicrosecondType), - (ArrowDataType::Duration(TimeUnit::Nanosecond), DurationNanosecondType), - (ArrowDataType::Interval(IntervalUnit::DayTime), IntervalDayTimeType), - (ArrowDataType::Interval(IntervalUnit::YearMonth), IntervalYearMonthType), - (ArrowDataType::Interval(IntervalUnit::MonthDayNano), IntervalMonthDayNanoType), - (ArrowDataType::Decimal128(_, _), Decimal128Type), - (ArrowDataType::Decimal256(_, _), Decimal256Type) - } - } - (Literal(lit), Literal(Scalar::Array(ad))) => { - #[allow(deprecated)] - let exists = ad.array_elements().contains(lit); - Ok(Arc::new(BooleanArray::from(vec![exists]))) - } - (l, r) => Err(Error::invalid_expression(format!( - "Invalid right value for (NOT) IN comparison, left is: {l} right is: {r}" - ))), - }, - ( - Binary(BinaryExpression { - op: NotIn, - left, - right, - }), - _, - ) => { - let reverse_op = Expression::binary(In, *left.clone(), *right.clone()); - let reverse_expr = evaluate_expression(&reverse_op, batch, None)?; - not(reverse_expr.as_boolean()) - .map(wrap_comparison_result) - .map_err(Error::generic_err) - } - (Binary(BinaryExpression { op, left, right }), _) => { - let left_arr = evaluate_expression(left.as_ref(), batch, None)?; - let right_arr = evaluate_expression(right.as_ref(), batch, None)?; - - type Operation = fn(&dyn Datum, &dyn Datum) -> Result; - let eval: Operation = match op { - Plus => add, - Minus => sub, - Multiply => mul, - Divide => div, - LessThan => |l, r| lt(l, r).map(wrap_comparison_result), - LessThanOrEqual => |l, r| lt_eq(l, r).map(wrap_comparison_result), - GreaterThan => |l, r| gt(l, r).map(wrap_comparison_result), - GreaterThanOrEqual => |l, r| gt_eq(l, r).map(wrap_comparison_result), - Equal => |l, r| eq(l, r).map(wrap_comparison_result), - NotEqual => |l, r| neq(l, r).map(wrap_comparison_result), - Distinct => |l, r| distinct(l, r).map(wrap_comparison_result), - // NOTE: [Not]In was already covered above - In | NotIn => return Err(Error::generic("Invalid expression given")), - }; - - eval(&left_arr, &right_arr).map_err(Error::generic_err) - } - (Variadic(VariadicExpression { op, exprs }), None | Some(&DataType::BOOLEAN)) => { - type Operation = fn(&BooleanArray, &BooleanArray) -> Result; - let (reducer, default): (Operation, _) = match op { - VariadicOperator::And => (and_kleene, true), - VariadicOperator::Or => (or_kleene, false), - }; - exprs - .iter() - .map(|expr| evaluate_expression(expr, batch, result_type)) - .reduce(|l, r| { - Ok(reducer(downcast_to_bool(&l?)?, downcast_to_bool(&r?)?) - .map(wrap_comparison_result)?) - }) - .unwrap_or_else(|| { - evaluate_expression(&Expression::literal(default), batch, result_type) - }) - } - (Variadic(_), _) => { - // NOTE: Update this error message if we add support for variadic operations on other types - Err(Error::Generic(format!( - "Variadic {expression:?} is expected to return boolean results, got {result_type:?}" - ))) - } - } -} - -// Apply a schema to an array. The array _must_ be a `StructArray`. Returns a `RecordBatch where the -// names of fields, nullable, and metadata in the struct have been transformed to match those in -// schema specified by `schema` -fn apply_schema(array: &dyn Array, schema: &DataType) -> DeltaResult { - let DataType::Struct(struct_schema) = schema else { - return Err(Error::generic( - "apply_schema at top-level must be passed a struct schema", - )); - }; - let applied = apply_schema_to_struct(array, struct_schema)?; - Ok(applied.into()) -} - -// helper to transform an arrow field+col into the specified target type. If `rename` is specified -// the field will be renamed to the contained `str`. -fn new_field_with_metadata( - field_name: &str, - data_type: &ArrowDataType, - nullable: bool, - metadata: Option>, -) -> ArrowField { - let mut field = ArrowField::new(field_name, data_type.clone(), nullable); - if let Some(metadata) = metadata { - field.set_metadata(metadata); - }; - field -} - -// A helper that is a wrapper over `transform_field_and_col`. This will take apart the passed struct -// and use that method to transform each column and then put the struct back together. Target types -// and names for each column should be passed in `target_types_and_names`. The number of elements in -// the `target_types_and_names` iterator _must_ be the same as the number of columns in -// `struct_array`. The transformation is ordinal. That is, the order of fields in `target_fields` -// _must_ match the order of the columns in `struct_array`. -fn transform_struct( - struct_array: &StructArray, - target_fields: impl Iterator>, -) -> DeltaResult { - let (_, arrow_cols, nulls) = struct_array.clone().into_parts(); - let input_col_count = arrow_cols.len(); - let result_iter = - arrow_cols - .into_iter() - .zip(target_fields) - .map(|(sa_col, target_field)| -> DeltaResult<_> { - let target_field = target_field.borrow(); - let transformed_col = apply_schema_to(&sa_col, target_field.data_type())?; - let transformed_field = new_field_with_metadata( - &target_field.name, - transformed_col.data_type(), - target_field.nullable, - Some(target_field.metadata_with_string_values()), - ); - Ok((transformed_field, transformed_col)) - }); - let (transformed_fields, transformed_cols): (Vec, Vec) = - result_iter.process_results(|iter| iter.unzip())?; - if transformed_cols.len() != input_col_count { - return Err(Error::InternalError(format!( - "Passed struct had {input_col_count} columns, but transformed column has {}", - transformed_cols.len() - ))); - } - Ok(StructArray::try_new( - transformed_fields.into(), - transformed_cols, - nulls, - )?) -} - -// Transform a struct array. The data is in `array`, and the target fields are in `kernel_fields`. -fn apply_schema_to_struct(array: &dyn Array, kernel_fields: &Schema) -> DeltaResult { - let Some(sa) = array.as_struct_opt() else { - return Err(make_arrow_error( - "Arrow claimed to be a struct but isn't a StructArray", - )); - }; - transform_struct(sa, kernel_fields.fields()) -} - -// deconstruct the array, then rebuild the mapped version -fn apply_schema_to_list( - array: &dyn Array, - target_inner_type: &ArrayType, -) -> DeltaResult { - let Some(la) = array.as_list_opt() else { - return Err(make_arrow_error( - "Arrow claimed to be a list but isn't a ListArray", - )); - }; - let (field, offset_buffer, values, nulls) = la.clone().into_parts(); - - let transformed_values = apply_schema_to(&values, &target_inner_type.element_type)?; - let transformed_field = ArrowField::new( - field.name(), - transformed_values.data_type().clone(), - target_inner_type.contains_null, - ); - Ok(ListArray::try_new( - Arc::new(transformed_field), - offset_buffer, - transformed_values, - nulls, - )?) -} - -// deconstruct a map, and rebuild it with the specified target kernel type -fn apply_schema_to_map(array: &dyn Array, kernel_map_type: &MapType) -> DeltaResult { - let Some(ma) = array.as_map_opt() else { - return Err(make_arrow_error( - "Arrow claimed to be a map but isn't a MapArray", - )); - }; - let (map_field, offset_buffer, map_struct_array, nulls, ordered) = ma.clone().into_parts(); - let target_fields = map_struct_array - .fields() - .iter() - .zip([&kernel_map_type.key_type, &kernel_map_type.value_type]) - .zip([false, kernel_map_type.value_contains_null]) - .map(|((arrow_field, target_type), nullable)| { - StructField::new(arrow_field.name(), target_type.clone(), nullable) - }); - - // Arrow puts the key type/val as the first field/col and the value type/val as the second. So - // we just transform like a 'normal' struct, but we know there are two fields/cols and we - // specify the key/value types as the target type iterator. - let transformed_map_struct_array = transform_struct(&map_struct_array, target_fields)?; - - let transformed_map_field = ArrowField::new( - map_field.name().clone(), - transformed_map_struct_array.data_type().clone(), - map_field.is_nullable(), - ); - Ok(MapArray::try_new( - Arc::new(transformed_map_field), - offset_buffer, - transformed_map_struct_array, - nulls, - ordered, - )?) -} - -// apply `schema` to `array`. This handles renaming, and adjusting nullability and metadata. if the -// actual data types don't match, this will return an error -fn apply_schema_to(array: &ArrayRef, schema: &DataType) -> DeltaResult { - use DataType::*; - let array: ArrayRef = match schema { - Struct(stype) => Arc::new(apply_schema_to_struct(array, stype)?), - Array(atype) => Arc::new(apply_schema_to_list(array, atype)?), - Map(mtype) => Arc::new(apply_schema_to_map(array, mtype)?), - _ => { - ensure_data_types(schema, array.data_type(), true)?; - array.clone() - } - }; - Ok(array) -} - -#[derive(Debug)] -pub struct ArrowExpressionHandler; - -impl ExpressionHandler for ArrowExpressionHandler { - fn get_evaluator( - &self, - schema: SchemaRef, - expression: Expression, - output_type: DataType, - ) -> Arc { - Arc::new(DefaultExpressionEvaluator { - input_schema: schema, - expression: Box::new(expression), - output_type, - }) - } -} - -#[derive(Debug)] -pub struct DefaultExpressionEvaluator { - input_schema: SchemaRef, - expression: Box, - output_type: DataType, -} - -impl ExpressionEvaluator for DefaultExpressionEvaluator { - fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult> { - let batch = batch - .any_ref() - .downcast_ref::() - .ok_or_else(|| Error::engine_data_type("ArrowEngineData"))? - .record_batch(); - let _input_schema: ArrowSchema = self.input_schema.as_ref().try_into()?; - // TODO: make sure we have matching schemas for validation - // if batch.schema().as_ref() != &input_schema { - // return Err(Error::Generic(format!( - // "input schema does not match batch schema: {:?} != {:?}", - // input_schema, - // batch.schema() - // ))); - // }; - let array_ref = evaluate_expression(&self.expression, batch, Some(&self.output_type))?; - let batch: RecordBatch = if let DataType::Struct(_) = self.output_type { - apply_schema(&array_ref, &self.output_type)? - } else { - let array_ref = apply_schema_to(&array_ref, &self.output_type)?; - let arrow_type: ArrowDataType = ArrowDataType::try_from(&self.output_type)?; - let schema = ArrowSchema::new(vec![ArrowField::new("output", arrow_type, true)]); - RecordBatch::try_new(Arc::new(schema), vec![array_ref])? - }; - Ok(Box::new(ArrowEngineData::new(batch))) - } -} - -#[cfg(test)] -mod tests { - use std::ops::{Add, Div, Mul, Sub}; - - use arrow_array::{GenericStringArray, Int32Array}; - use arrow_buffer::ScalarBuffer; - use arrow_schema::{DataType, Field, Fields, Schema}; - - use super::*; - use crate::expressions::*; - use crate::schema::ArrayType; - use crate::DataType as DeltaDataTypes; - - #[test] - fn test_array_column() { - let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]); - let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9])); - let field = Arc::new(Field::new("item", DataType::Int32, true)); - let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true)); - - let schema = Schema::new([arr_field.clone()]); - - let array = ListArray::new(field.clone(), offsets, Arc::new(values), None); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap(); - - let not_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item")); - - let in_op = Expression::binary(BinaryOperator::In, 5, column_expr!("item")); - - let result = evaluate_expression(¬_op, &batch, None).unwrap(); - let expected = BooleanArray::from(vec![true, false, true]); - assert_eq!(result.as_ref(), &expected); - - let in_result = evaluate_expression(&in_op, &batch, None).unwrap(); - let in_expected = BooleanArray::from(vec![false, true, false]); - assert_eq!(in_result.as_ref(), &in_expected); - } - - #[test] - fn test_bad_right_type_array() { - let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]); - let field = Arc::new(Field::new("item", DataType::Int32, true)); - let schema = Schema::new([field.clone()]); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values.clone())]).unwrap(); - - let in_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item")); - - let in_result = evaluate_expression(&in_op, &batch, None); - - assert!(in_result.is_err()); - assert_eq!( - in_result.unwrap_err().to_string(), - "Invalid expression evaluation: Cannot cast to list array: Int32" - ); - } - - #[test] - fn test_literal_type_array() { - let field = Arc::new(Field::new("item", DataType::Int32, true)); - let schema = Schema::new([field.clone()]); - let batch = RecordBatch::new_empty(Arc::new(schema)); - - let in_op = Expression::binary( - BinaryOperator::NotIn, - 5, - Scalar::Array(ArrayData::new( - ArrayType::new(DeltaDataTypes::INTEGER, false), - vec![Scalar::Integer(1), Scalar::Integer(2)], - )), - ); - - let in_result = evaluate_expression(&in_op, &batch, None).unwrap(); - let in_expected = BooleanArray::from(vec![true]); - assert_eq!(in_result.as_ref(), &in_expected); - } - - #[test] - fn test_invalid_array_sides() { - let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]); - let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9])); - let field = Arc::new(Field::new("item", DataType::Int32, true)); - let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true)); - - let schema = Schema::new([arr_field.clone()]); - - let array = ListArray::new(field.clone(), offsets, Arc::new(values), None); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap(); - - let in_op = Expression::binary( - BinaryOperator::NotIn, - column_expr!("item"), - column_expr!("item"), - ); - - let in_result = evaluate_expression(&in_op, &batch, None); - - assert!(in_result.is_err()); - assert_eq!( - in_result.unwrap_err().to_string(), - "Invalid expression evaluation: Invalid right value for (NOT) IN comparison, left is: Column(item) right is: Column(item)".to_string() - ) - } - - #[test] - fn test_str_arrays() { - let values = GenericStringArray::::from(vec![ - "hi", "bye", "hi", "hi", "bye", "bye", "hi", "bye", "hi", - ]); - let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9])); - let field = Arc::new(Field::new("item", DataType::Utf8, true)); - let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true)); - let schema = Schema::new([arr_field.clone()]); - let array = ListArray::new(field.clone(), offsets, Arc::new(values), None); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap(); - - let str_not_op = Expression::binary(BinaryOperator::NotIn, "bye", column_expr!("item")); - - let str_in_op = Expression::binary(BinaryOperator::In, "hi", column_expr!("item")); - - let result = evaluate_expression(&str_in_op, &batch, None).unwrap(); - let expected = BooleanArray::from(vec![true, true, true]); - assert_eq!(result.as_ref(), &expected); - - let in_result = evaluate_expression(&str_not_op, &batch, None).unwrap(); - let in_expected = BooleanArray::from(vec![false, false, false]); - assert_eq!(in_result.as_ref(), &in_expected); - } - - #[test] - fn test_extract_column() { - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let values = Int32Array::from(vec![1, 2, 3]); - let batch = - RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values.clone())]).unwrap(); - let column = column_expr!("a"); - - let results = evaluate_expression(&column, &batch, None).unwrap(); - assert_eq!(results.as_ref(), &values); - - let schema = Schema::new(vec![Field::new( - "b", - DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])), - false, - )]); - - let struct_values: ArrayRef = Arc::new(values.clone()); - let struct_array = StructArray::from(vec![( - Arc::new(Field::new("a", DataType::Int32, false)), - struct_values, - )]); - let batch = RecordBatch::try_new( - Arc::new(schema.clone()), - vec![Arc::new(struct_array.clone())], - ) - .unwrap(); - let column = column_expr!("b.a"); - let results = evaluate_expression(&column, &batch, None).unwrap(); - assert_eq!(results.as_ref(), &values); - } - - #[test] - fn test_binary_op_scalar() { - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let values = Int32Array::from(vec![1, 2, 3]); - let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap(); - let column = column_expr!("a"); - - let expression = column.clone().add(1); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![2, 3, 4])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().sub(1); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![0, 1, 2])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().mul(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![2, 4, 6])); - assert_eq!(results.as_ref(), expected.as_ref()); - - // TODO handle type casting - let expression = column.div(1); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![1, 2, 3])); - assert_eq!(results.as_ref(), expected.as_ref()) - } - - #[test] - fn test_binary_op() { - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, false), - Field::new("b", DataType::Int32, false), - ]); - let values = Int32Array::from(vec![1, 2, 3]); - let batch = RecordBatch::try_new( - Arc::new(schema.clone()), - vec![Arc::new(values.clone()), Arc::new(values)], - ) - .unwrap(); - let column_a = column_expr!("a"); - let column_b = column_expr!("b"); - - let expression = column_a.clone().add(column_b.clone()); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![2, 4, 6])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column_a.clone().sub(column_b.clone()); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![0, 0, 0])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column_a.clone().mul(column_b); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(Int32Array::from(vec![1, 4, 9])); - assert_eq!(results.as_ref(), expected.as_ref()); - } - - #[test] - fn test_binary_cmp() { - let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let values = Int32Array::from(vec![1, 2, 3]); - let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap(); - let column = column_expr!("a"); - - let expression = column.clone().lt(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(BooleanArray::from(vec![true, false, false])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().lt_eq(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(BooleanArray::from(vec![true, true, false])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().gt(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(BooleanArray::from(vec![false, false, true])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().gt_eq(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(BooleanArray::from(vec![false, true, true])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().eq(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(BooleanArray::from(vec![false, true, false])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column.clone().ne(2); - let results = evaluate_expression(&expression, &batch, None).unwrap(); - let expected = Arc::new(BooleanArray::from(vec![true, false, true])); - assert_eq!(results.as_ref(), expected.as_ref()); - } - - #[test] - fn test_logical() { - let schema = Schema::new(vec![ - Field::new("a", DataType::Boolean, false), - Field::new("b", DataType::Boolean, false), - ]); - let batch = RecordBatch::try_new( - Arc::new(schema.clone()), - vec![ - Arc::new(BooleanArray::from(vec![true, false])), - Arc::new(BooleanArray::from(vec![false, true])), - ], - ) - .unwrap(); - let column_a = column_expr!("a"); - let column_b = column_expr!("b"); - - let expression = column_a.clone().and(column_b.clone()); - let results = - evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)) - .unwrap(); - let expected = Arc::new(BooleanArray::from(vec![false, false])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column_a.clone().and(true); - let results = - evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)) - .unwrap(); - let expected = Arc::new(BooleanArray::from(vec![true, false])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column_a.clone().or(column_b); - let results = - evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)) - .unwrap(); - let expected = Arc::new(BooleanArray::from(vec![true, true])); - assert_eq!(results.as_ref(), expected.as_ref()); - - let expression = column_a.clone().or(false); - let results = - evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)) - .unwrap(); - let expected = Arc::new(BooleanArray::from(vec![true, false])); - assert_eq!(results.as_ref(), expected.as_ref()); - } -} diff --git a/kernel/src/engine/arrow_expression/apply_schema.rs b/kernel/src/engine/arrow_expression/apply_schema.rs new file mode 100644 index 000000000..68fbb1438 --- /dev/null +++ b/kernel/src/engine/arrow_expression/apply_schema.rs @@ -0,0 +1,185 @@ +use std::borrow::Borrow; +use std::collections::HashMap; +use std::sync::Arc; + +use itertools::Itertools; + +use crate::arrow::array::{ + Array, ArrayRef, AsArray, ListArray, MapArray, RecordBatch, StructArray, +}; +use crate::arrow::datatypes::Schema as ArrowSchema; +use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField}; + +use super::super::arrow_utils::make_arrow_error; +use crate::engine::ensure_data_types::ensure_data_types; +use crate::error::{DeltaResult, Error}; +use crate::schema::{ArrayType, DataType, MapType, Schema, StructField}; + +// Apply a schema to an array. The array _must_ be a `StructArray`. Returns a `RecordBatch where the +// names of fields, nullable, and metadata in the struct have been transformed to match those in +// schema specified by `schema` +pub(crate) fn apply_schema(array: &dyn Array, schema: &DataType) -> DeltaResult { + let DataType::Struct(struct_schema) = schema else { + return Err(Error::generic( + "apply_schema at top-level must be passed a struct schema", + )); + }; + let applied = apply_schema_to_struct(array, struct_schema)?; + let (fields, columns, nulls) = applied.into_parts(); + if let Some(nulls) = nulls { + if nulls.null_count() != 0 { + return Err(Error::invalid_struct_data( + "Top-level nulls in struct are not supported", + )); + } + } + Ok(RecordBatch::try_new( + Arc::new(ArrowSchema::new(fields)), + columns, + )?) +} + +// helper to transform an arrow field+col into the specified target type. If `rename` is specified +// the field will be renamed to the contained `str`. +fn new_field_with_metadata( + field_name: &str, + data_type: &ArrowDataType, + nullable: bool, + metadata: Option>, +) -> ArrowField { + let mut field = ArrowField::new(field_name, data_type.clone(), nullable); + if let Some(metadata) = metadata { + field.set_metadata(metadata); + }; + field +} + +// A helper that is a wrapper over `transform_field_and_col`. This will take apart the passed struct +// and use that method to transform each column and then put the struct back together. Target types +// and names for each column should be passed in `target_types_and_names`. The number of elements in +// the `target_types_and_names` iterator _must_ be the same as the number of columns in +// `struct_array`. The transformation is ordinal. That is, the order of fields in `target_fields` +// _must_ match the order of the columns in `struct_array`. +fn transform_struct( + struct_array: &StructArray, + target_fields: impl Iterator>, +) -> DeltaResult { + let (_, arrow_cols, nulls) = struct_array.clone().into_parts(); + let input_col_count = arrow_cols.len(); + let result_iter = + arrow_cols + .into_iter() + .zip(target_fields) + .map(|(sa_col, target_field)| -> DeltaResult<_> { + let target_field = target_field.borrow(); + let transformed_col = apply_schema_to(&sa_col, target_field.data_type())?; + let transformed_field = new_field_with_metadata( + &target_field.name, + transformed_col.data_type(), + target_field.nullable, + Some(target_field.metadata_with_string_values()), + ); + Ok((transformed_field, transformed_col)) + }); + let (transformed_fields, transformed_cols): (Vec, Vec) = + result_iter.process_results(|iter| iter.unzip())?; + if transformed_cols.len() != input_col_count { + return Err(Error::InternalError(format!( + "Passed struct had {input_col_count} columns, but transformed column has {}", + transformed_cols.len() + ))); + } + Ok(StructArray::try_new( + transformed_fields.into(), + transformed_cols, + nulls, + )?) +} + +// Transform a struct array. The data is in `array`, and the target fields are in `kernel_fields`. +fn apply_schema_to_struct(array: &dyn Array, kernel_fields: &Schema) -> DeltaResult { + let Some(sa) = array.as_struct_opt() else { + return Err(make_arrow_error( + "Arrow claimed to be a struct but isn't a StructArray", + )); + }; + transform_struct(sa, kernel_fields.fields()) +} + +// deconstruct the array, then rebuild the mapped version +fn apply_schema_to_list( + array: &dyn Array, + target_inner_type: &ArrayType, +) -> DeltaResult { + let Some(la) = array.as_list_opt() else { + return Err(make_arrow_error( + "Arrow claimed to be a list but isn't a ListArray", + )); + }; + let (field, offset_buffer, values, nulls) = la.clone().into_parts(); + + let transformed_values = apply_schema_to(&values, &target_inner_type.element_type)?; + let transformed_field = ArrowField::new( + field.name(), + transformed_values.data_type().clone(), + target_inner_type.contains_null, + ); + Ok(ListArray::try_new( + Arc::new(transformed_field), + offset_buffer, + transformed_values, + nulls, + )?) +} + +// deconstruct a map, and rebuild it with the specified target kernel type +fn apply_schema_to_map(array: &dyn Array, kernel_map_type: &MapType) -> DeltaResult { + let Some(ma) = array.as_map_opt() else { + return Err(make_arrow_error( + "Arrow claimed to be a map but isn't a MapArray", + )); + }; + let (map_field, offset_buffer, map_struct_array, nulls, ordered) = ma.clone().into_parts(); + let target_fields = map_struct_array + .fields() + .iter() + .zip([&kernel_map_type.key_type, &kernel_map_type.value_type]) + .zip([false, kernel_map_type.value_contains_null]) + .map(|((arrow_field, target_type), nullable)| { + StructField::new(arrow_field.name(), target_type.clone(), nullable) + }); + + // Arrow puts the key type/val as the first field/col and the value type/val as the second. So + // we just transform like a 'normal' struct, but we know there are two fields/cols and we + // specify the key/value types as the target type iterator. + let transformed_map_struct_array = transform_struct(&map_struct_array, target_fields)?; + + let transformed_map_field = ArrowField::new( + map_field.name().clone(), + transformed_map_struct_array.data_type().clone(), + map_field.is_nullable(), + ); + Ok(MapArray::try_new( + Arc::new(transformed_map_field), + offset_buffer, + transformed_map_struct_array, + nulls, + ordered, + )?) +} + +// apply `schema` to `array`. This handles renaming, and adjusting nullability and metadata. if the +// actual data types don't match, this will return an error +pub(crate) fn apply_schema_to(array: &ArrayRef, schema: &DataType) -> DeltaResult { + use DataType::*; + let array: ArrayRef = match schema { + Struct(stype) => Arc::new(apply_schema_to_struct(array, stype)?), + Array(atype) => Arc::new(apply_schema_to_list(array, atype)?), + Map(mtype) => Arc::new(apply_schema_to_map(array, mtype)?), + _ => { + ensure_data_types(schema, array.data_type(), true)?; + array.clone() + } + }; + Ok(array) +} diff --git a/kernel/src/engine/arrow_expression/evaluate_expression.rs b/kernel/src/engine/arrow_expression/evaluate_expression.rs new file mode 100644 index 000000000..3a2876f78 --- /dev/null +++ b/kernel/src/engine/arrow_expression/evaluate_expression.rs @@ -0,0 +1,247 @@ +//! Expression handling based on arrow-rs compute kernels. +use crate::arrow::array::types::*; +use crate::arrow::array::{ + Array, ArrayRef, AsArray, BooleanArray, Datum, RecordBatch, StructArray, +}; +use crate::arrow::compute::kernels::cmp::{distinct, eq, gt, gt_eq, lt, lt_eq, neq}; +use crate::arrow::compute::kernels::comparison::in_list_utf8; +use crate::arrow::compute::kernels::numeric::{add, div, mul, sub}; +use crate::arrow::compute::{and_kleene, is_null, not, or_kleene}; +use crate::arrow::datatypes::{ + DataType as ArrowDataType, Field as ArrowField, IntervalUnit, TimeUnit, +}; +use crate::arrow::error::ArrowError; +use crate::engine::arrow_utils::prim_array_cmp; +use crate::error::{DeltaResult, Error}; +use crate::expressions::{ + BinaryExpression, BinaryOperator, Expression, Scalar, UnaryExpression, UnaryOperator, + VariadicExpression, VariadicOperator, +}; +use crate::schema::DataType; +use itertools::Itertools; +use std::sync::Arc; + +fn downcast_to_bool(arr: &dyn Array) -> DeltaResult<&BooleanArray> { + arr.as_any() + .downcast_ref::() + .ok_or_else(|| Error::generic("expected boolean array")) +} + +fn wrap_comparison_result(arr: BooleanArray) -> ArrayRef { + Arc::new(arr) as _ +} + +trait ProvidesColumnByName { + fn column_by_name(&self, name: &str) -> Option<&ArrayRef>; +} + +impl ProvidesColumnByName for RecordBatch { + fn column_by_name(&self, name: &str) -> Option<&ArrayRef> { + self.column_by_name(name) + } +} + +impl ProvidesColumnByName for StructArray { + fn column_by_name(&self, name: &str) -> Option<&ArrayRef> { + self.column_by_name(name) + } +} + +// Given a RecordBatch or StructArray, recursively probe for a nested column path and return the +// corresponding column, or Err if the path is invalid. For example, given the following schema: +// ```text +// root: { +// a: int32, +// b: struct { +// c: int32, +// d: struct { +// e: int32, +// f: int64, +// }, +// }, +// } +// ``` +// The path ["b", "d", "f"] would retrieve the int64 column while ["a", "b"] would produce an error. +fn extract_column(mut parent: &dyn ProvidesColumnByName, col: &[String]) -> DeltaResult { + let mut field_names = col.iter(); + let Some(mut field_name) = field_names.next() else { + return Err(ArrowError::SchemaError("Empty column path".to_string()))?; + }; + loop { + let child = parent + .column_by_name(field_name) + .ok_or_else(|| ArrowError::SchemaError(format!("No such field: {field_name}")))?; + field_name = match field_names.next() { + Some(name) => name, + None => return Ok(child.clone()), + }; + parent = child + .as_any() + .downcast_ref::() + .ok_or_else(|| ArrowError::SchemaError(format!("Not a struct: {field_name}")))?; + } +} + +pub(crate) fn evaluate_expression( + expression: &Expression, + batch: &RecordBatch, + result_type: Option<&DataType>, +) -> DeltaResult { + use BinaryOperator::*; + use Expression::*; + match (expression, result_type) { + (Literal(scalar), _) => Ok(scalar.to_array(batch.num_rows())?), + (Column(name), _) => extract_column(batch, name), + (Struct(fields), Some(DataType::Struct(output_schema))) => { + let columns = fields + .iter() + .zip(output_schema.fields()) + .map(|(expr, field)| evaluate_expression(expr, batch, Some(field.data_type()))); + let output_cols: Vec = columns.try_collect()?; + let output_fields: Vec = output_cols + .iter() + .zip(output_schema.fields()) + .map(|(output_col, output_field)| -> DeltaResult<_> { + Ok(ArrowField::new( + output_field.name(), + output_col.data_type().clone(), + output_col.is_nullable(), + )) + }) + .try_collect()?; + let result = StructArray::try_new(output_fields.into(), output_cols, None)?; + Ok(Arc::new(result)) + } + (Struct(_), _) => Err(Error::generic( + "Data type is required to evaluate struct expressions", + )), + (Unary(UnaryExpression { op, expr }), _) => { + let arr = evaluate_expression(expr.as_ref(), batch, None)?; + Ok(match op { + UnaryOperator::Not => Arc::new(not(downcast_to_bool(&arr)?)?), + UnaryOperator::IsNull => Arc::new(is_null(&arr)?), + }) + } + ( + Binary(BinaryExpression { + op: In, + left, + right, + }), + _, + ) => match (left.as_ref(), right.as_ref()) { + (Literal(_), Column(_)) => { + let left_arr = evaluate_expression(left.as_ref(), batch, None)?; + let right_arr = evaluate_expression(right.as_ref(), batch, None)?; + if let Some(string_arr) = left_arr.as_string_opt::() { + if let Some(right_arr) = right_arr.as_list_opt::() { + return in_list_utf8(string_arr, right_arr) + .map(wrap_comparison_result) + .map_err(Error::generic_err); + } + } + prim_array_cmp! { + left_arr, right_arr, + (ArrowDataType::Int8, Int8Type), + (ArrowDataType::Int16, Int16Type), + (ArrowDataType::Int32, Int32Type), + (ArrowDataType::Int64, Int64Type), + (ArrowDataType::UInt8, UInt8Type), + (ArrowDataType::UInt16, UInt16Type), + (ArrowDataType::UInt32, UInt32Type), + (ArrowDataType::UInt64, UInt64Type), + (ArrowDataType::Float16, Float16Type), + (ArrowDataType::Float32, Float32Type), + (ArrowDataType::Float64, Float64Type), + (ArrowDataType::Timestamp(TimeUnit::Second, _), TimestampSecondType), + (ArrowDataType::Timestamp(TimeUnit::Millisecond, _), TimestampMillisecondType), + (ArrowDataType::Timestamp(TimeUnit::Microsecond, _), TimestampMicrosecondType), + (ArrowDataType::Timestamp(TimeUnit::Nanosecond, _), TimestampNanosecondType), + (ArrowDataType::Date32, Date32Type), + (ArrowDataType::Date64, Date64Type), + (ArrowDataType::Time32(TimeUnit::Second), Time32SecondType), + (ArrowDataType::Time32(TimeUnit::Millisecond), Time32MillisecondType), + (ArrowDataType::Time64(TimeUnit::Microsecond), Time64MicrosecondType), + (ArrowDataType::Time64(TimeUnit::Nanosecond), Time64NanosecondType), + (ArrowDataType::Duration(TimeUnit::Second), DurationSecondType), + (ArrowDataType::Duration(TimeUnit::Millisecond), DurationMillisecondType), + (ArrowDataType::Duration(TimeUnit::Microsecond), DurationMicrosecondType), + (ArrowDataType::Duration(TimeUnit::Nanosecond), DurationNanosecondType), + (ArrowDataType::Interval(IntervalUnit::DayTime), IntervalDayTimeType), + (ArrowDataType::Interval(IntervalUnit::YearMonth), IntervalYearMonthType), + (ArrowDataType::Interval(IntervalUnit::MonthDayNano), IntervalMonthDayNanoType), + (ArrowDataType::Decimal128(_, _), Decimal128Type), + (ArrowDataType::Decimal256(_, _), Decimal256Type) + } + } + (Literal(lit), Literal(Scalar::Array(ad))) => { + #[allow(deprecated)] + let exists = ad.array_elements().contains(lit); + Ok(Arc::new(BooleanArray::from(vec![exists]))) + } + (l, r) => Err(Error::invalid_expression(format!( + "Invalid right value for (NOT) IN comparison, left is: {l} right is: {r}" + ))), + }, + ( + Binary(BinaryExpression { + op: NotIn, + left, + right, + }), + _, + ) => { + let reverse_op = Expression::binary(In, *left.clone(), *right.clone()); + let reverse_expr = evaluate_expression(&reverse_op, batch, None)?; + not(reverse_expr.as_boolean()) + .map(wrap_comparison_result) + .map_err(Error::generic_err) + } + (Binary(BinaryExpression { op, left, right }), _) => { + let left_arr = evaluate_expression(left.as_ref(), batch, None)?; + let right_arr = evaluate_expression(right.as_ref(), batch, None)?; + + type Operation = fn(&dyn Datum, &dyn Datum) -> Result; + let eval: Operation = match op { + Plus => add, + Minus => sub, + Multiply => mul, + Divide => div, + LessThan => |l, r| lt(l, r).map(wrap_comparison_result), + LessThanOrEqual => |l, r| lt_eq(l, r).map(wrap_comparison_result), + GreaterThan => |l, r| gt(l, r).map(wrap_comparison_result), + GreaterThanOrEqual => |l, r| gt_eq(l, r).map(wrap_comparison_result), + Equal => |l, r| eq(l, r).map(wrap_comparison_result), + NotEqual => |l, r| neq(l, r).map(wrap_comparison_result), + Distinct => |l, r| distinct(l, r).map(wrap_comparison_result), + // NOTE: [Not]In was already covered above + In | NotIn => return Err(Error::generic("Invalid expression given")), + }; + + eval(&left_arr, &right_arr).map_err(Error::generic_err) + } + (Variadic(VariadicExpression { op, exprs }), None | Some(&DataType::BOOLEAN)) => { + type Operation = fn(&BooleanArray, &BooleanArray) -> Result; + let (reducer, default): (Operation, _) = match op { + VariadicOperator::And => (and_kleene, true), + VariadicOperator::Or => (or_kleene, false), + }; + exprs + .iter() + .map(|expr| evaluate_expression(expr, batch, result_type)) + .reduce(|l, r| { + Ok(reducer(downcast_to_bool(&l?)?, downcast_to_bool(&r?)?) + .map(wrap_comparison_result)?) + }) + .unwrap_or_else(|| { + evaluate_expression(&Expression::literal(default), batch, result_type) + }) + } + (Variadic(_), _) => { + // NOTE: Update this error message if we add support for variadic operations on other types + Err(Error::Generic(format!( + "Variadic {expression:?} is expected to return boolean results, got {result_type:?}" + ))) + } + } +} diff --git a/kernel/src/engine/arrow_expression/mod.rs b/kernel/src/engine/arrow_expression/mod.rs new file mode 100644 index 000000000..019531931 --- /dev/null +++ b/kernel/src/engine/arrow_expression/mod.rs @@ -0,0 +1,194 @@ +//! Expression handling based on arrow-rs compute kernels. +use std::sync::Arc; + +use crate::arrow::array::{ + Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Decimal128Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, RecordBatch, + StringArray, StructArray, TimestampMicrosecondArray, +}; +use crate::arrow::buffer::OffsetBuffer; +use crate::arrow::compute::concat; +use crate::arrow::datatypes::{ + DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema, +}; + +use super::arrow_conversion::LIST_ARRAY_ROOT; +use crate::engine::arrow_data::ArrowEngineData; +use crate::error::{DeltaResult, Error}; +use crate::expressions::{Expression, Scalar}; +use crate::schema::{DataType, PrimitiveType, SchemaRef}; +use crate::{EngineData, EvaluationHandler, ExpressionEvaluator}; + +use itertools::Itertools; +use tracing::debug; + +use apply_schema::{apply_schema, apply_schema_to}; +use evaluate_expression::evaluate_expression; + +mod apply_schema; +mod evaluate_expression; + +#[cfg(test)] +mod tests; + +// TODO leverage scalars / Datum + +impl Scalar { + /// Convert scalar to arrow array. + pub fn to_array(&self, num_rows: usize) -> DeltaResult { + use Scalar::*; + let arr: ArrayRef = match self { + Integer(val) => Arc::new(Int32Array::from_value(*val, num_rows)), + Long(val) => Arc::new(Int64Array::from_value(*val, num_rows)), + Short(val) => Arc::new(Int16Array::from_value(*val, num_rows)), + Byte(val) => Arc::new(Int8Array::from_value(*val, num_rows)), + Float(val) => Arc::new(Float32Array::from_value(*val, num_rows)), + Double(val) => Arc::new(Float64Array::from_value(*val, num_rows)), + String(val) => Arc::new(StringArray::from(vec![val.clone(); num_rows])), + Boolean(val) => Arc::new(BooleanArray::from(vec![*val; num_rows])), + Timestamp(val) => { + Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows).with_timezone("UTC")) + } + TimestampNtz(val) => Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows)), + Date(val) => Arc::new(Date32Array::from_value(*val, num_rows)), + Binary(val) => Arc::new(BinaryArray::from(vec![val.as_slice(); num_rows])), + Decimal(val, precision, scale) => Arc::new( + Decimal128Array::from_value(*val, num_rows) + .with_precision_and_scale(*precision, *scale as i8)?, + ), + Struct(data) => { + let arrays = data + .values() + .iter() + .map(|val| val.to_array(num_rows)) + .try_collect()?; + let fields: Fields = data + .fields() + .iter() + .map(ArrowField::try_from) + .try_collect()?; + Arc::new(StructArray::try_new(fields, arrays, None)?) + } + Array(data) => { + #[allow(deprecated)] + let values = data.array_elements(); + let vecs: Vec<_> = values.iter().map(|v| v.to_array(num_rows)).try_collect()?; + let values: Vec<_> = vecs.iter().map(|x| x.as_ref()).collect(); + let offsets: Vec<_> = vecs.iter().map(|v| v.len()).collect(); + let offset_buffer = OffsetBuffer::from_lengths(offsets); + let field = ArrowField::try_from(data.array_type())?; + Arc::new(ListArray::new( + Arc::new(field), + offset_buffer, + concat(values.as_slice())?, + None, + )) + } + Null(DataType::BYTE) => Arc::new(Int8Array::new_null(num_rows)), + Null(DataType::SHORT) => Arc::new(Int16Array::new_null(num_rows)), + Null(DataType::INTEGER) => Arc::new(Int32Array::new_null(num_rows)), + Null(DataType::LONG) => Arc::new(Int64Array::new_null(num_rows)), + Null(DataType::FLOAT) => Arc::new(Float32Array::new_null(num_rows)), + Null(DataType::DOUBLE) => Arc::new(Float64Array::new_null(num_rows)), + Null(DataType::STRING) => Arc::new(StringArray::new_null(num_rows)), + Null(DataType::BOOLEAN) => Arc::new(BooleanArray::new_null(num_rows)), + Null(DataType::TIMESTAMP) => { + Arc::new(TimestampMicrosecondArray::new_null(num_rows).with_timezone("UTC")) + } + Null(DataType::TIMESTAMP_NTZ) => { + Arc::new(TimestampMicrosecondArray::new_null(num_rows)) + } + Null(DataType::DATE) => Arc::new(Date32Array::new_null(num_rows)), + Null(DataType::BINARY) => Arc::new(BinaryArray::new_null(num_rows)), + Null(DataType::Primitive(PrimitiveType::Decimal(precision, scale))) => Arc::new( + Decimal128Array::new_null(num_rows) + .with_precision_and_scale(*precision, *scale as i8)?, + ), + Null(DataType::Struct(t)) => { + let fields: Fields = t.fields().map(ArrowField::try_from).try_collect()?; + Arc::new(StructArray::new_null(fields, num_rows)) + } + Null(DataType::Array(t)) => { + let field = ArrowField::new(LIST_ARRAY_ROOT, t.element_type().try_into()?, true); + Arc::new(ListArray::new_null(Arc::new(field), num_rows)) + } + Null(DataType::Map { .. }) => { + return Err(Error::unsupported( + "Scalar::to_array does not yet support Map types", + )); + } + }; + Ok(arr) + } +} + +#[derive(Debug)] +pub struct ArrowEvaluationHandler; + +impl EvaluationHandler for ArrowEvaluationHandler { + fn new_expression_evaluator( + &self, + schema: SchemaRef, + expression: Expression, + output_type: DataType, + ) -> Arc { + Arc::new(DefaultExpressionEvaluator { + input_schema: schema, + expression: Box::new(expression), + output_type, + }) + } + + /// Create a single-row array with all-null leaf values. Note that if a nested struct is + /// included in the `output_type`, the entire struct will be NULL (instead of a not-null struct + /// with NULL fields). + fn null_row(&self, output_schema: SchemaRef) -> DeltaResult> { + let fields = output_schema.fields(); + let arrays = fields + .map(|field| Scalar::Null(field.data_type().clone()).to_array(1)) + .try_collect()?; + let record_batch = + RecordBatch::try_new(Arc::new(output_schema.as_ref().try_into()?), arrays)?; + Ok(Box::new(ArrowEngineData::new(record_batch))) + } +} + +#[derive(Debug)] +pub struct DefaultExpressionEvaluator { + input_schema: SchemaRef, + expression: Box, + output_type: DataType, +} + +impl ExpressionEvaluator for DefaultExpressionEvaluator { + fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult> { + debug!( + "Arrow evaluator evaluating: {:#?}", + self.expression.as_ref() + ); + let batch = batch + .any_ref() + .downcast_ref::() + .ok_or_else(|| Error::engine_data_type("ArrowEngineData"))? + .record_batch(); + let _input_schema: ArrowSchema = self.input_schema.as_ref().try_into()?; + // TODO: make sure we have matching schemas for validation + // if batch.schema().as_ref() != &input_schema { + // return Err(Error::Generic(format!( + // "input schema does not match batch schema: {:?} != {:?}", + // input_schema, + // batch.schema() + // ))); + // }; + let array_ref = evaluate_expression(&self.expression, batch, Some(&self.output_type))?; + let batch: RecordBatch = if let DataType::Struct(_) = self.output_type { + apply_schema(&array_ref, &self.output_type)? + } else { + let array_ref = apply_schema_to(&array_ref, &self.output_type)?; + let arrow_type: ArrowDataType = ArrowDataType::try_from(&self.output_type)?; + let schema = ArrowSchema::new(vec![ArrowField::new("output", arrow_type, true)]); + RecordBatch::try_new(Arc::new(schema), vec![array_ref])? + }; + Ok(Box::new(ArrowEngineData::new(batch))) + } +} diff --git a/kernel/src/engine/arrow_expression/tests.rs b/kernel/src/engine/arrow_expression/tests.rs new file mode 100644 index 000000000..586e5cdb7 --- /dev/null +++ b/kernel/src/engine/arrow_expression/tests.rs @@ -0,0 +1,502 @@ +use std::ops::{Add, Div, Mul, Sub}; + +use crate::arrow::array::{ + create_array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, ListArray, StructArray, +}; +use crate::arrow::buffer::{OffsetBuffer, ScalarBuffer}; +use crate::arrow::datatypes::{DataType, Field, Fields, Schema}; + +use super::*; +use crate::expressions::*; +use crate::schema::{ArrayType, StructField, StructType}; +use crate::DataType as DeltaDataTypes; +use crate::EvaluationHandlerExtension as _; + +#[test] +fn test_array_column() { + let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9])); + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true)); + + let schema = Schema::new([arr_field.clone()]); + + let array = ListArray::new(field.clone(), offsets, Arc::new(values), None); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap(); + + let not_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item")); + + let in_op = Expression::binary(BinaryOperator::In, 5, column_expr!("item")); + + let result = evaluate_expression(¬_op, &batch, None).unwrap(); + let expected = BooleanArray::from(vec![true, false, true]); + assert_eq!(result.as_ref(), &expected); + + let in_result = evaluate_expression(&in_op, &batch, None).unwrap(); + let in_expected = BooleanArray::from(vec![false, true, false]); + assert_eq!(in_result.as_ref(), &in_expected); +} + +#[test] +fn test_bad_right_type_array() { + let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]); + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let schema = Schema::new([field.clone()]); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values.clone())]).unwrap(); + + let in_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item")); + + let in_result = evaluate_expression(&in_op, &batch, None); + + assert!(in_result.is_err()); + assert_eq!( + in_result.unwrap_err().to_string(), + "Invalid expression evaluation: Cannot cast to list array: Int32" + ); +} + +#[test] +fn test_literal_type_array() { + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let schema = Schema::new([field.clone()]); + let batch = RecordBatch::new_empty(Arc::new(schema)); + + let in_op = Expression::binary( + BinaryOperator::NotIn, + 5, + Scalar::Array(ArrayData::new( + ArrayType::new(DeltaDataTypes::INTEGER, false), + vec![Scalar::Integer(1), Scalar::Integer(2)], + )), + ); + + let in_result = evaluate_expression(&in_op, &batch, None).unwrap(); + let in_expected = BooleanArray::from(vec![true]); + assert_eq!(in_result.as_ref(), &in_expected); +} + +#[test] +fn test_invalid_array_sides() { + let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9])); + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true)); + + let schema = Schema::new([arr_field.clone()]); + + let array = ListArray::new(field.clone(), offsets, Arc::new(values), None); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap(); + + let in_op = Expression::binary( + BinaryOperator::NotIn, + column_expr!("item"), + column_expr!("item"), + ); + + let in_result = evaluate_expression(&in_op, &batch, None); + + assert!(in_result.is_err()); + assert_eq!( + in_result.unwrap_err().to_string(), + "Invalid expression evaluation: Invalid right value for (NOT) IN comparison, left is: Column(item) right is: Column(item)".to_string() + ) +} + +#[test] +fn test_str_arrays() { + let values = GenericStringArray::::from(vec![ + "hi", "bye", "hi", "hi", "bye", "bye", "hi", "bye", "hi", + ]); + let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9])); + let field = Arc::new(Field::new("item", DataType::Utf8, true)); + let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true)); + let schema = Schema::new([arr_field.clone()]); + let array = ListArray::new(field.clone(), offsets, Arc::new(values), None); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap(); + + let str_not_op = Expression::binary(BinaryOperator::NotIn, "bye", column_expr!("item")); + + let str_in_op = Expression::binary(BinaryOperator::In, "hi", column_expr!("item")); + + let result = evaluate_expression(&str_in_op, &batch, None).unwrap(); + let expected = BooleanArray::from(vec![true, true, true]); + assert_eq!(result.as_ref(), &expected); + + let in_result = evaluate_expression(&str_not_op, &batch, None).unwrap(); + let in_expected = BooleanArray::from(vec![false, false, false]); + assert_eq!(in_result.as_ref(), &in_expected); +} + +#[test] +fn test_extract_column() { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let values = Int32Array::from(vec![1, 2, 3]); + let batch = + RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values.clone())]).unwrap(); + let column = column_expr!("a"); + + let results = evaluate_expression(&column, &batch, None).unwrap(); + assert_eq!(results.as_ref(), &values); + + let schema = Schema::new(vec![Field::new( + "b", + DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])), + false, + )]); + + let struct_values: ArrayRef = Arc::new(values.clone()); + let struct_array = StructArray::from(vec![( + Arc::new(Field::new("a", DataType::Int32, false)), + struct_values, + )]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![Arc::new(struct_array.clone())], + ) + .unwrap(); + let column = column_expr!("b.a"); + let results = evaluate_expression(&column, &batch, None).unwrap(); + assert_eq!(results.as_ref(), &values); +} + +#[test] +fn test_binary_op_scalar() { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let values = Int32Array::from(vec![1, 2, 3]); + let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap(); + let column = column_expr!("a"); + + let expression = column.clone().add(1); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![2, 3, 4])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().sub(1); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![0, 1, 2])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().mul(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![2, 4, 6])); + assert_eq!(results.as_ref(), expected.as_ref()); + + // TODO handle type casting + let expression = column.div(1); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![1, 2, 3])); + assert_eq!(results.as_ref(), expected.as_ref()) +} + +#[test] +fn test_binary_op() { + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + ]); + let values = Int32Array::from(vec![1, 2, 3]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![Arc::new(values.clone()), Arc::new(values)], + ) + .unwrap(); + let column_a = column_expr!("a"); + let column_b = column_expr!("b"); + + let expression = column_a.clone().add(column_b.clone()); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![2, 4, 6])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column_a.clone().sub(column_b.clone()); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![0, 0, 0])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column_a.clone().mul(column_b); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(Int32Array::from(vec![1, 4, 9])); + assert_eq!(results.as_ref(), expected.as_ref()); +} + +#[test] +fn test_binary_cmp() { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let values = Int32Array::from(vec![1, 2, 3]); + let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap(); + let column = column_expr!("a"); + + let expression = column.clone().lt(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![true, false, false])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().lt_eq(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![true, true, false])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().gt(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![false, false, true])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().gt_eq(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![false, true, true])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().eq(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![false, true, false])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column.clone().ne(2); + let results = evaluate_expression(&expression, &batch, None).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![true, false, true])); + assert_eq!(results.as_ref(), expected.as_ref()); +} + +#[test] +fn test_logical() { + let schema = Schema::new(vec![ + Field::new("a", DataType::Boolean, false), + Field::new("b", DataType::Boolean, false), + ]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![ + Arc::new(BooleanArray::from(vec![true, false])), + Arc::new(BooleanArray::from(vec![false, true])), + ], + ) + .unwrap(); + let column_a = column_expr!("a"); + let column_b = column_expr!("b"); + + let expression = column_a.clone().and(column_b.clone()); + let results = + evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![false, false])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column_a.clone().and(true); + let results = + evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![true, false])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column_a.clone().or(column_b); + let results = + evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![true, true])); + assert_eq!(results.as_ref(), expected.as_ref()); + + let expression = column_a.clone().or(false); + let results = + evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap(); + let expected = Arc::new(BooleanArray::from(vec![true, false])); + assert_eq!(results.as_ref(), expected.as_ref()); +} + +#[test] +fn test_null_row() { + // note that we _allow_ nested nulls, since the top-level struct can be NULL + let schema = Arc::new(StructType::new(vec![ + StructField::nullable( + "x", + StructType::new([ + StructField::nullable("a", crate::schema::DataType::INTEGER), + StructField::not_null("b", crate::schema::DataType::STRING), + ]), + ), + StructField::nullable("c", crate::schema::DataType::STRING), + ])); + let handler = ArrowEvaluationHandler; + let result = handler.null_row(schema.clone()).unwrap(); + let expected = RecordBatch::try_new( + Arc::new(schema.as_ref().try_into().unwrap()), + vec![ + Arc::new(StructArray::new_null( + [ + Arc::new(Field::new("a", DataType::Int32, true)), + Arc::new(Field::new("b", DataType::Utf8, false)), + ] + .into(), + 1, + )), + create_array!(Utf8, [None::]), + ], + ) + .unwrap(); + let result: RecordBatch = result + .into_any() + .downcast::() + .unwrap() + .into(); + assert_eq!(result, expected); +} + +#[test] +fn test_null_row_err() { + let not_null_schema = Arc::new(StructType::new(vec![StructField::not_null( + "a", + crate::schema::DataType::STRING, + )])); + let handler = ArrowEvaluationHandler; + assert!(handler.null_row(not_null_schema).is_err()); +} + +// helper to take values/schema to pass to `create_one` and assert the result = expected +fn assert_create_one(values: &[Scalar], schema: SchemaRef, expected: RecordBatch) { + let handler = ArrowEvaluationHandler; + let actual = handler.create_one(schema, values).unwrap(); + let actual_rb: RecordBatch = actual + .into_any() + .downcast::() + .unwrap() + .into(); + assert_eq!(actual_rb, expected); +} + +#[test] +fn test_create_one() { + let values: &[Scalar] = &[ + 1.into(), + "B".into(), + 3.into(), + Scalar::Null(DeltaDataTypes::INTEGER), + ]; + let schema = Arc::new(StructType::new([ + StructField::nullable("a", DeltaDataTypes::INTEGER), + StructField::nullable("b", DeltaDataTypes::STRING), + StructField::not_null("c", DeltaDataTypes::INTEGER), + StructField::nullable("d", DeltaDataTypes::INTEGER), + ])); + + let expected_schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Utf8, true), + Field::new("c", DataType::Int32, false), + Field::new("d", DataType::Int32, true), + ])); + let expected = RecordBatch::try_new( + expected_schema, + vec![ + create_array!(Int32, [1]), + create_array!(Utf8, ["B"]), + create_array!(Int32, [3]), + create_array!(Int32, [None]), + ], + ) + .unwrap(); + assert_create_one(values, schema, expected); +} + +#[test] +fn test_create_one_nested() { + let values: &[Scalar] = &[1.into(), 2.into()]; + let schema = Arc::new(StructType::new([StructField::not_null( + "a", + DeltaDataTypes::struct_type([ + StructField::nullable("b", DeltaDataTypes::INTEGER), + StructField::not_null("c", DeltaDataTypes::INTEGER), + ]), + )])); + let expected_schema = Arc::new(Schema::new(vec![Field::new( + "a", + DataType::Struct( + vec![ + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, false), + ] + .into(), + ), + false, + )])); + let expected = RecordBatch::try_new( + expected_schema, + vec![Arc::new(StructArray::from(vec![ + ( + Arc::new(Field::new("b", DataType::Int32, true)), + create_array!(Int32, [1]) as ArrayRef, + ), + ( + Arc::new(Field::new("c", DataType::Int32, false)), + create_array!(Int32, [2]) as ArrayRef, + ), + ]))], + ) + .unwrap(); + assert_create_one(values, schema, expected); +} + +#[test] +fn test_create_one_nested_null() { + let values: &[Scalar] = &[Scalar::Null(DeltaDataTypes::INTEGER), 1.into()]; + let schema = Arc::new(StructType::new([StructField::not_null( + "a", + DeltaDataTypes::struct_type([ + StructField::nullable("b", DeltaDataTypes::INTEGER), + StructField::not_null("c", DeltaDataTypes::INTEGER), + ]), + )])); + let expected_schema = Arc::new(Schema::new(vec![Field::new( + "a", + DataType::Struct( + vec![ + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, false), + ] + .into(), + ), + false, + )])); + let expected = RecordBatch::try_new( + expected_schema, + vec![Arc::new(StructArray::from(vec![ + ( + Arc::new(Field::new("b", DataType::Int32, true)), + create_array!(Int32, [None]) as ArrayRef, + ), + ( + Arc::new(Field::new("c", DataType::Int32, false)), + create_array!(Int32, [1]) as ArrayRef, + ), + ]))], + ) + .unwrap(); + assert_create_one(values, schema, expected); +} + +#[test] +fn test_create_one_not_null_struct() { + let values: &[Scalar] = &[ + Scalar::Null(DeltaDataTypes::INTEGER), + Scalar::Null(DeltaDataTypes::INTEGER), + ]; + let schema = Arc::new(StructType::new([StructField::not_null( + "a", + DeltaDataTypes::struct_type([ + StructField::not_null("b", DeltaDataTypes::INTEGER), + StructField::nullable("c", DeltaDataTypes::INTEGER), + ]), + )])); + let handler = ArrowEvaluationHandler; + assert!(handler.create_one(schema, values).is_err()); +} + +#[test] +fn test_create_one_top_level_null() { + let values = &[Scalar::Null(DeltaDataTypes::INTEGER)]; + let handler = ArrowEvaluationHandler; + + let schema = Arc::new(StructType::new([StructField::not_null( + "col_1", + DeltaDataTypes::INTEGER, + )])); + assert!(matches!( + handler.create_one(schema, values), + Err(Error::InvalidStructData(_)) + )); +} diff --git a/kernel/src/engine/arrow_get_data.rs b/kernel/src/engine/arrow_get_data.rs index 145aab66b..fbed64df1 100644 --- a/kernel/src/engine/arrow_get_data.rs +++ b/kernel/src/engine/arrow_get_data.rs @@ -1,4 +1,4 @@ -use arrow_array::{ +use crate::arrow::array::{ types::{GenericStringType, Int32Type, Int64Type}, Array, BooleanArray, GenericByteArray, GenericListArray, MapArray, OffsetSizeTrait, PrimitiveArray, diff --git a/kernel/src/engine/arrow_utils.rs b/kernel/src/engine/arrow_utils.rs index 06441b9d4..749f1399c 100644 --- a/kernel/src/engine/arrow_utils.rs +++ b/kernel/src/engine/arrow_utils.rs @@ -12,18 +12,19 @@ use crate::{ DeltaResult, EngineData, Error, }; -use arrow_array::{ - cast::AsArray, new_null_array, Array as ArrowArray, GenericListArray, OffsetSizeTrait, - RecordBatch, StringArray, StructArray, +use crate::arrow::array::{ + cast::AsArray, make_array, new_null_array, Array as ArrowArray, GenericListArray, + OffsetSizeTrait, RecordBatch, StringArray, StructArray, }; -use arrow_json::{LineDelimitedWriter, ReaderBuilder}; -use arrow_schema::{ +use crate::arrow::buffer::NullBuffer; +use crate::arrow::compute::concat_batches; +use crate::arrow::datatypes::{ DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef, Fields, SchemaRef as ArrowSchemaRef, }; -use arrow_select::concat::concat_batches; +use crate::arrow::json::{LineDelimitedWriter, ReaderBuilder}; +use crate::parquet::{arrow::ProjectionMask, schema::types::SchemaDescriptor}; use itertools::Itertools; -use parquet::{arrow::ProjectionMask, schema::types::SchemaDescriptor}; use tracing::debug; macro_rules! prim_array_cmp { @@ -40,7 +41,7 @@ macro_rules! prim_array_cmp { .ok_or(Error::invalid_expression( format!("Cannot cast to list array: {}", $right_arr.data_type())) )?; - arrow_ord::comparison::in_list(prim_array, list_array).map(wrap_comparison_result) + crate::arrow::compute::kernels::comparison::in_list(prim_array, list_array).map(wrap_comparison_result) } )+ _ => Err(ArrowError::CastError( @@ -59,7 +60,25 @@ pub(crate) use prim_array_cmp; /// returns a tuples of (mask_indices: Vec, reorder_indices: /// Vec). `mask_indices` is used for generating the mask for reading from the pub(crate) fn make_arrow_error(s: impl Into) -> Error { - Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(s.into())).with_backtrace() + Error::Arrow(crate::arrow::error::ArrowError::InvalidArgumentError( + s.into(), + )) + .with_backtrace() +} + +/// Applies post-processing to data read from parquet files. This includes `reorder_struct_array` to +/// ensure schema compatibility, as well as `fix_nested_null_masks` to ensure that leaf columns have +/// accurate null masks that row visitors rely on for correctness. +pub(crate) fn fixup_parquet_read( + batch: RecordBatch, + requested_ordering: &[ReorderIndex], +) -> DeltaResult +where + StructArray: Into, +{ + let data = reorder_struct_array(batch.into(), requested_ordering)?; + let data = fix_nested_null_masks(data); + Ok(data.into()) } /* @@ -500,7 +519,7 @@ pub(crate) fn reorder_struct_array( match &reorder_index.transform { ReorderIndexTransform::Cast(target) => { let col = input_cols[parquet_position].as_ref(); - let col = Arc::new(arrow_cast::cast::cast(col, target)?); + let col = Arc::new(crate::arrow::compute::cast(col, target)?); let new_field = Arc::new( input_fields[parquet_position] .as_ref() @@ -609,6 +628,53 @@ fn reorder_list( } } +/// Use this function to recursively compute properly unioned null masks for all nested +/// columns of a record batch, making it safe to project out and consume nested columns. +/// +/// Arrow does not guarantee that the null masks associated with nested columns are accurate -- +/// instead, the reader must consult the union of logical null masks the column and all +/// ancestors. The parquet reader stopped doing this automatically as of arrow-53.3, for example. +pub fn fix_nested_null_masks(batch: StructArray) -> StructArray { + compute_nested_null_masks(batch, None) +} + +/// Splits a StructArray into its parts, unions in the parent null mask, and uses the result to +/// recursively update the children as well before putting everything back together. +fn compute_nested_null_masks(sa: StructArray, parent_nulls: Option<&NullBuffer>) -> StructArray { + let (fields, columns, nulls) = sa.into_parts(); + let nulls = NullBuffer::union(parent_nulls, nulls.as_ref()); + let columns = columns + .into_iter() + .map(|column| match column.as_struct_opt() { + Some(sa) => Arc::new(compute_nested_null_masks(sa.clone(), nulls.as_ref())) as _, + None => { + let data = column.to_data(); + let nulls = NullBuffer::union(nulls.as_ref(), data.nulls()); + let builder = data.into_builder().nulls(nulls); + // Use an unchecked build to avoid paying a redundant O(k) validation cost for a + // `RecordBatch` with k leaf columns. + // + // SAFETY: The builder was constructed from an `ArrayData` we extracted from the + // column. The change we make is the null buffer, via `NullBuffer::union` with input + // null buffers that were _also_ extracted from the column and its parent. A union + // can only _grow_ the set of NULL rows, so data validity is preserved. Even if the + // `parent_nulls` somehow had a length mismatch --- which it never should, having + // also been extracted from our grandparent --- the mismatch would have already + // caused `NullBuffer::union` to panic. + let data = unsafe { builder.build_unchecked() }; + make_array(data) + } + }) + .collect(); + + // Use an unchecked constructor to avoid paying O(n*k) a redundant null buffer validation cost + // for a `RecordBatch` with n rows and k leaf columns. + // + // SAFETY: We are simply reassembling the input `StructArray` we previously broke apart, with + // updated null buffers. See above for details about null buffer safety. + unsafe { StructArray::new_unchecked(fields, columns, nulls) } +} + /// Arrow lacks the functionality to json-parse a string column into a struct column -- even tho the /// JSON file reader does exactly the same thing. This function is a hack to work around that gap. pub(crate) fn parse_json( @@ -679,17 +745,17 @@ pub(crate) fn to_json_bytes( mod tests { use std::sync::Arc; - use arrow::{ - array::AsArray, - buffer::{OffsetBuffer, ScalarBuffer}, - }; - use arrow_array::{ + use crate::arrow::array::{ Array, ArrayRef as ArrowArrayRef, BooleanArray, GenericListArray, Int32Array, StructArray, }; - use arrow_schema::{ + use crate::arrow::datatypes::{ DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, }; + use crate::arrow::{ + array::AsArray, + buffer::{OffsetBuffer, ScalarBuffer}, + }; use crate::schema::{ArrayType, DataType, MapType, StructField, StructType}; @@ -1432,4 +1498,107 @@ mod tests { ); Ok(()) } + + #[test] + fn test_arrow_broken_nested_null_masks() { + use crate::arrow::datatypes::{DataType, Field, Fields, Schema}; + use crate::engine::arrow_utils::fix_nested_null_masks; + use crate::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + + // Parse some JSON into a nested schema + let schema = Arc::new(Schema::new(vec![Field::new( + "outer", + DataType::Struct(Fields::from(vec![ + Field::new( + "inner_nullable", + DataType::Struct(Fields::from(vec![ + Field::new("leaf_non_null", DataType::Int32, false), + Field::new("leaf_nullable", DataType::Int32, true), + ])), + true, + ), + Field::new( + "inner_non_null", + DataType::Struct(Fields::from(vec![ + Field::new("leaf_non_null", DataType::Int32, false), + Field::new("leaf_nullable", DataType::Int32, true), + ])), + false, + ), + ])), + true, + )])); + let json_string = r#" +{ } +{ "outer" : { "inner_non_null" : { "leaf_non_null" : 1 } } } +{ "outer" : { "inner_non_null" : { "leaf_non_null" : 2, "leaf_nullable" : 3 } } } +{ "outer" : { "inner_non_null" : { "leaf_non_null" : 4 }, "inner_nullable" : { "leaf_non_null" : 5 } } } +{ "outer" : { "inner_non_null" : { "leaf_non_null" : 6 }, "inner_nullable" : { "leaf_non_null" : 7, "leaf_nullable": 8 } } } +"#; + let batch1 = crate::arrow::json::ReaderBuilder::new(schema.clone()) + .build(json_string.as_bytes()) + .unwrap() + .next() + .unwrap() + .unwrap(); + println!("Batch 1: {batch1:?}"); + + macro_rules! assert_nulls { + ( $column: expr, $nulls: expr ) => { + assert_eq!($column.nulls().unwrap(), &NullBuffer::from(&$nulls[..])); + }; + } + + // If any of these tests ever fail, it means the arrow JSON reader started producing + // incomplete nested NULL masks. If that happens, we need to update all JSON reads to call + // `fix_nested_null_masks`. + let outer_1 = batch1.column(0).as_struct(); + assert_nulls!(outer_1, [false, true, true, true, true]); + let inner_nullable_1 = outer_1.column(0).as_struct(); + assert_nulls!(inner_nullable_1, [false, false, false, true, true]); + let nullable_leaf_non_null_1 = inner_nullable_1.column(0); + assert_nulls!(nullable_leaf_non_null_1, [false, false, false, true, true]); + let nullable_leaf_nullable_1 = inner_nullable_1.column(1); + assert_nulls!(nullable_leaf_nullable_1, [false, false, false, false, true]); + let inner_non_null_1 = outer_1.column(1).as_struct(); + assert_nulls!(inner_non_null_1, [false, true, true, true, true]); + let non_null_leaf_non_null_1 = inner_non_null_1.column(0); + assert_nulls!(non_null_leaf_non_null_1, [false, true, true, true, true]); + let non_null_leaf_nullable_1 = inner_non_null_1.column(1); + assert_nulls!(non_null_leaf_nullable_1, [false, false, true, false, false]); + + // Write the batch to a parquet file and read it back + let mut buffer = vec![]; + let mut writer = + crate::parquet::arrow::ArrowWriter::try_new(&mut buffer, schema.clone(), None).unwrap(); + writer.write(&batch1).unwrap(); + writer.close().unwrap(); // writer must be closed to write footer + let batch2 = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(buffer)) + .unwrap() + .build() + .unwrap() + .next() + .unwrap() + .unwrap(); + println!("Batch 2 before: {batch2:?}"); + + // Starting from arrow-53.3, the parquet reader started returning broken nested NULL masks. + let batch2 = RecordBatch::from(fix_nested_null_masks(batch2.into())); + + // Verify the data survived the round trip + let outer_2 = batch2.column(0).as_struct(); + assert_eq!(outer_2, outer_1); + let inner_nullable_2 = outer_2.column(0).as_struct(); + assert_eq!(inner_nullable_2, inner_nullable_1); + let nullable_leaf_non_null_2 = inner_nullable_2.column(0); + assert_eq!(nullable_leaf_non_null_2, nullable_leaf_non_null_1); + let nullable_leaf_nullable_2 = inner_nullable_2.column(1); + assert_eq!(nullable_leaf_nullable_2, nullable_leaf_nullable_1); + let inner_non_null_2 = outer_2.column(1).as_struct(); + assert_eq!(inner_non_null_2, inner_non_null_1); + let non_null_leaf_non_null_2 = inner_non_null_2.column(0); + assert_eq!(non_null_leaf_non_null_2, non_null_leaf_non_null_1); + let non_null_leaf_nullable_2 = inner_non_null_2.column(1); + assert_eq!(non_null_leaf_nullable_2, non_null_leaf_nullable_1); + } } diff --git a/kernel/src/engine/default/file_stream.rs b/kernel/src/engine/default/file_stream.rs index 075716a75..bcdc370a0 100644 --- a/kernel/src/engine/default/file_stream.rs +++ b/kernel/src/engine/default/file_stream.rs @@ -5,8 +5,8 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{ready, Context, Poll}; -use arrow_array::RecordBatch; -use arrow_schema::SchemaRef as ArrowSchemaRef; +use crate::arrow::array::RecordBatch; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; use futures::future::BoxFuture; use futures::stream::{BoxStream, Stream, StreamExt}; use futures::FutureExt; diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index 5606a28d0..fdbc79ebf 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -7,29 +7,27 @@ use object_store::path::Path; use object_store::{DynObjectStore, ObjectStore}; use url::Url; +use super::UrlExt; use crate::engine::default::executor::TaskExecutor; -use crate::{DeltaResult, Error, FileMeta, FileSlice, FileSystemClient}; +use crate::{DeltaResult, Error, FileMeta, FileSlice, StorageHandler}; #[derive(Debug)] -pub struct ObjectStoreFileSystemClient { +pub struct ObjectStoreStorageHandler { inner: Arc, has_ordered_listing: bool, - table_root: Path, task_executor: Arc, readahead: usize, } -impl ObjectStoreFileSystemClient { +impl ObjectStoreStorageHandler { pub(crate) fn new( store: Arc, has_ordered_listing: bool, - table_root: Path, task_executor: Arc, ) -> Self { Self { inner: store, has_ordered_listing, - table_root, task_executor, readahead: 10, } @@ -42,21 +40,33 @@ impl ObjectStoreFileSystemClient { } } -impl FileSystemClient for ObjectStoreFileSystemClient { +impl StorageHandler for ObjectStoreStorageHandler { fn list_from( &self, path: &Url, ) -> DeltaResult>>> { - let url = path.clone(); - let offset = Path::from(path.path()); - // TODO properly handle table prefix - let prefix = self.table_root.child("_delta_log"); + // The offset is used for list-after; the prefix is used to restrict the listing to a specific directory. + // Unfortunately, `Path` provides no easy way to check whether a name is directory-like, + // because it strips trailing /, so we're reduced to manually checking the original URL. + let offset = Path::from_url_path(path.path())?; + let prefix = if path.path().ends_with('/') { + offset.clone() + } else { + let mut parts = offset.parts().collect_vec(); + if parts.pop().is_none() { + return Err(Error::Generic(format!( + "Offset path must not be a root directory. Got: '{}'", + path.as_str() + ))); + } + Path::from_iter(parts) + }; let store = self.inner.clone(); // This channel will become the iterator let (sender, receiver) = std::sync::mpsc::sync_channel(4_000); - + let url = path.clone(); self.task_executor.spawn(async move { let mut stream = store.list_with_offset(Some(&prefix), &offset); @@ -122,19 +132,14 @@ impl FileSystemClient for ObjectStoreFileSystemClient { }; let store = store.clone(); async move { - match url.scheme() { - "http" | "https" => { - // have to annotate type here or rustc can't figure it out - Ok::(reqwest::get(url).await?.bytes().await?) - } - _ => { - if let Some(rng) = range { - Ok(store.get_range(&path, rng).await?) - } else { - let result = store.get(&path).await?; - Ok(result.bytes().await?) - } - } + if url.is_presigned() { + // have to annotate type here or rustc can't figure it out + Ok::(reqwest::get(url).await?.bytes().await?) + } else if let Some(rng) = range { + Ok(store.get_range(&path, rng).await?) + } else { + let result = store.get(&path).await?; + Ok(result.bytes().await?) } } }) @@ -192,11 +197,9 @@ mod tests { let mut url = Url::from_directory_path(tmp.path()).unwrap(); let store = Arc::new(LocalFileSystem::new()); - let prefix = Path::from(url.path()); - let client = ObjectStoreFileSystemClient::new( + let storage = ObjectStoreStorageHandler::new( store, false, // don't have ordered listing - prefix, Arc::new(TokioBackgroundExecutor::new()), ); @@ -210,7 +213,7 @@ mod tests { url.set_path(&format!("{}/c", url.path())); slices.push((url, Some(Range { start: 4, end: 9 }))); dbg!("Slices are: {}", &slices); - let data: Vec = client.read_files(slices).unwrap().try_collect().unwrap(); + let data: Vec = storage.read_files(slices).unwrap().try_collect().unwrap(); assert_eq!(data.len(), 3); assert_eq!(data[0], Bytes::from("kernel")); @@ -229,11 +232,10 @@ mod tests { store.put(&name, data.clone().into()).await.unwrap(); let table_root = Url::parse("memory:///").expect("valid url"); - let prefix = Path::from_url_path(table_root.path()).expect("Couldn't get path"); - let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new())); + let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); let files: Vec<_> = engine - .get_file_system_client() - .list_from(&table_root) + .storage_handler() + .list_from(&table_root.join("_delta_log").unwrap().join("0").unwrap()) .unwrap() .try_collect() .unwrap(); @@ -260,11 +262,11 @@ mod tests { let url = Url::from_directory_path(tmp.path()).unwrap(); let store = Arc::new(LocalFileSystem::new()); - let prefix = Path::from_url_path(url.path()).expect("Couldn't get path"); - let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new())); - let client = engine.get_file_system_client(); - - let files = client.list_from(&Url::parse("file://").unwrap()).unwrap(); + let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); + let files = engine + .storage_handler() + .list_from(&url.join("_delta_log").unwrap().join("0").unwrap()) + .unwrap(); let mut len = 0; for (file, expected) in files.zip(expected_names.iter()) { assert!( diff --git a/kernel/src/engine/default/json.rs b/kernel/src/engine/default/json.rs index ab296e12a..1dc35539e 100644 --- a/kernel/src/engine/default/json.rs +++ b/kernel/src/engine/default/json.rs @@ -2,19 +2,22 @@ use std::io::BufReader; use std::ops::Range; -use std::sync::Arc; -use std::task::{ready, Poll}; +use std::sync::{mpsc, Arc}; +use std::task::Poll; -use arrow_json::ReaderBuilder; -use arrow_schema::SchemaRef as ArrowSchemaRef; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; +use crate::arrow::json::ReaderBuilder; +use crate::arrow::record_batch::RecordBatch; use bytes::{Buf, Bytes}; -use futures::{StreamExt, TryStreamExt}; +use futures::stream::{self, BoxStream}; +use futures::{ready, StreamExt, TryStreamExt}; use object_store::path::Path; use object_store::{DynObjectStore, GetResultPayload}; +use tracing::warn; use url::Url; use super::executor::TaskExecutor; -use super::file_stream::{FileOpenFuture, FileOpener, FileStream}; +use crate::engine::arrow_data::ArrowEngineData; use crate::engine::arrow_utils::parse_json as arrow_parse_json; use crate::engine::arrow_utils::to_json_bytes; use crate::schema::SchemaRef; @@ -23,15 +26,21 @@ use crate::{ JsonHandler, }; +const DEFAULT_BUFFER_SIZE: usize = 1000; +const DEFAULT_BATCH_SIZE: usize = 1000; + #[derive(Debug)] pub struct DefaultJsonHandler { /// The object store to read files from store: Arc, /// The executor to run async tasks on task_executor: Arc, - /// The maximum number of batches to read ahead - readahead: usize, - /// The number of rows to read per batch + /// The maximum number of read requests to buffer in memory at once. Note that this actually + /// controls two things: the number of concurrent requests (done by `buffered`) and the size of + /// the buffer (via our `sync_channel`). + buffer_size: usize, + /// Limit the number of rows per batch. That is, for batch_size = N, then each RecordBatch + /// yielded by the stream will have at most N rows. batch_size: usize, } @@ -40,22 +49,34 @@ impl DefaultJsonHandler { Self { store, task_executor, - readahead: 10, - batch_size: 1024, + buffer_size: DEFAULT_BUFFER_SIZE, + batch_size: DEFAULT_BATCH_SIZE, } } - /// Set the maximum number of batches to read ahead during [Self::read_json_files()]. + /// Set the maximum number read requests to buffer in memory at once in + /// [Self::read_json_files()]. + /// + /// Defaults to 1000. /// - /// Defaults to 10. - pub fn with_readahead(mut self, readahead: usize) -> Self { - self.readahead = readahead; + /// Memory constraints can be imposed by constraining the buffer size and batch size. Note that + /// overall memory usage is proportional to the product of these two values. + /// 1. Batch size governs the size of RecordBatches yielded in each iteration of the stream + /// 2. Buffer size governs the number of concurrent tasks (which equals the size of the buffer + pub fn with_buffer_size(mut self, buffer_size: usize) -> Self { + self.buffer_size = buffer_size; self } - /// Set the number of rows to read per batch during [Self::parse_json()]. + /// Limit the number of rows per batch. That is, for batch_size = N, then each RecordBatch + /// yielded by the stream will have at most N rows. + /// + /// Defaults to 1000 rows (json objects). + /// + /// See [Decoder::with_buffer_size] for details on constraining memory usage with buffer size + /// and batch size. /// - /// Defaults to 1024. + /// [Decoder::with_buffer_size]: crate::arrow::json::reader::Decoder pub fn with_batch_size(mut self, batch_size: usize) -> Self { self.batch_size = batch_size; self @@ -83,13 +104,32 @@ impl JsonHandler for DefaultJsonHandler { let schema: ArrowSchemaRef = Arc::new(physical_schema.as_ref().try_into()?); let file_opener = JsonOpener::new(self.batch_size, schema.clone(), self.store.clone()); - FileStream::new_async_read_iterator( - self.task_executor.clone(), - schema, - Box::new(file_opener), - files, - self.readahead, - ) + + let (tx, rx) = mpsc::sync_channel(self.buffer_size); + let files = files.to_vec(); + let buffer_size = self.buffer_size; + + self.task_executor.spawn(async move { + // an iterator of futures that open each file + let file_futures = files.into_iter().map(|file| file_opener.open(file, None)); + + // create a stream from that iterator which buffers up to `buffer_size` futures at a time + let mut stream = stream::iter(file_futures) + .buffered(buffer_size) + .try_flatten() + .map_ok(|record_batch| -> Box { + Box::new(ArrowEngineData::new(record_batch)) + }); + + // send each record batch over the channel + while let Some(item) = stream.next().await { + if tx.send(item).is_err() { + warn!("read_json receiver end of channel dropped before sending completed"); + } + } + }); + + Ok(Box::new(rx.into_iter())) } // note: for now we just buffer all the data and write it out all at once @@ -102,7 +142,7 @@ impl JsonHandler for DefaultJsonHandler { let buffer = to_json_bytes(data)?; // Put if absent let store = self.store.clone(); // cheap Arc - let path = Path::from(path.path()); + let path = Path::from_url_path(path.path())?; let path_str = path.to_string(); self.task_executor .block_on(async move { @@ -118,7 +158,7 @@ impl JsonHandler for DefaultJsonHandler { } } -/// A [`FileOpener`] that opens a JSON file and yields a [`FileOpenFuture`] +/// Opens JSON files and returns a stream of record batches #[allow(missing_debug_implementations)] pub struct JsonOpener { batch_size: usize, @@ -127,97 +167,309 @@ pub struct JsonOpener { } impl JsonOpener { - /// Returns a [`JsonOpener`] + /// Returns a [`JsonOpener`] pub fn new( batch_size: usize, projected_schema: ArrowSchemaRef, - // file_compression_type: FileCompressionType, object_store: Arc, ) -> Self { Self { batch_size, projected_schema, - // file_compression_type, object_store, } } } -impl FileOpener for JsonOpener { - fn open(&self, file_meta: FileMeta, _: Option>) -> DeltaResult { +impl JsonOpener { + pub async fn open( + &self, + file_meta: FileMeta, + _: Option>, + ) -> DeltaResult>> { let store = self.object_store.clone(); let schema = self.projected_schema.clone(); let batch_size = self.batch_size; - Ok(Box::pin(async move { - let path = Path::from_url_path(file_meta.location.path())?; - match store.get(&path).await?.payload { - GetResultPayload::File(file, _) => { - let reader = ReaderBuilder::new(schema) - .with_batch_size(batch_size) - .build(BufReader::new(file))?; - Ok(futures::stream::iter(reader).map_err(Error::from).boxed()) - } - GetResultPayload::Stream(s) => { - let mut decoder = ReaderBuilder::new(schema) - .with_batch_size(batch_size) - .build_decoder()?; - - let mut input = s.map_err(Error::from); - let mut buffered = Bytes::new(); - - let s = futures::stream::poll_fn(move |cx| { - loop { - if buffered.is_empty() { - buffered = match ready!(input.poll_next_unpin(cx)) { - Some(Ok(b)) => b, - Some(Err(e)) => return Poll::Ready(Some(Err(e))), - None => break, - }; - } - let read = buffered.len(); - - let decoded = match decoder.decode(buffered.as_ref()) { - Ok(decoded) => decoded, - Err(e) => return Poll::Ready(Some(Err(e.into()))), + let path = Path::from_url_path(file_meta.location.path())?; + match store.get(&path).await?.payload { + GetResultPayload::File(file, _) => { + let reader = ReaderBuilder::new(schema) + .with_batch_size(batch_size) + .build(BufReader::new(file))?; + Ok(futures::stream::iter(reader).map_err(Error::from).boxed()) + } + GetResultPayload::Stream(s) => { + let mut decoder = ReaderBuilder::new(schema) + .with_batch_size(batch_size) + .build_decoder()?; + + let mut input = s.map_err(Error::from); + let mut buffered = Bytes::new(); + + let s = futures::stream::poll_fn(move |cx| { + loop { + if buffered.is_empty() { + buffered = match ready!(input.poll_next_unpin(cx)) { + Some(Ok(b)) => b, + Some(Err(e)) => return Poll::Ready(Some(Err(e))), + None => break, }; - - buffered.advance(decoded); - if decoded != read { - break; - } } + let read = buffered.len(); + + // NB (from Decoder::decode docs): + // Read JSON objects from `buf` (param), returning the number of bytes read + // + // This method returns once `batch_size` objects have been parsed since the + // last call to [`Self::flush`], or `buf` is exhausted. Any remaining bytes + // should be included in the next call to [`Self::decode`] + let decoded = match decoder.decode(buffered.as_ref()) { + Ok(decoded) => decoded, + Err(e) => return Poll::Ready(Some(Err(e.into()))), + }; + + buffered.advance(decoded); + if decoded != read { + break; + } + } - Poll::Ready(decoder.flush().map_err(Error::from).transpose()) - }); - Ok(s.map_err(Error::from).boxed()) - } + Poll::Ready(decoder.flush().map_err(Error::from).transpose()) + }); + Ok(s.map_err(Error::from).boxed()) } - })) + } } } #[cfg(test)] mod tests { + use std::collections::{HashMap, HashSet, VecDeque}; use std::path::PathBuf; - - use arrow::array::{AsArray, RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use std::sync::{mpsc, Arc, Mutex}; + use std::task::Waker; + + use crate::actions::get_log_schema; + use crate::arrow::array::{AsArray, Int32Array, RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use crate::engine::arrow_data::ArrowEngineData; + use crate::engine::default::executor::tokio::{ + TokioBackgroundExecutor, TokioMultiThreadExecutor, + }; + use crate::utils::test_utils::string_array_to_engine_data; + use futures::future; use itertools::Itertools; - use object_store::{local::LocalFileSystem, ObjectStore}; + use object_store::local::LocalFileSystem; + use object_store::memory::InMemory; + use object_store::{ + GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, + }; + + // TODO: should just use the one from test_utils, but running into dependency issues + fn into_record_batch(engine_data: Box) -> RecordBatch { + ArrowEngineData::try_from_engine_data(engine_data) + .unwrap() + .into() + } use super::*; - use crate::{ - actions::get_log_schema, engine::arrow_data::ArrowEngineData, - engine::default::executor::tokio::TokioBackgroundExecutor, - }; - fn string_array_to_engine_data(string_array: StringArray) -> Box { - let string_field = Arc::new(Field::new("a", DataType::Utf8, true)); - let schema = Arc::new(ArrowSchema::new(vec![string_field])); - let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)]) - .expect("Can't convert to record batch"); - Box::new(ArrowEngineData::new(batch)) + /// Store wrapper that wraps an inner store to guarantee the ordering of GET requests. Note + /// that since the keys are resolved in order, requests to subsequent keys in the order will + /// block until the earlier keys are requested. + /// + /// WARN: Does not handle duplicate keys, and will fail on duplicate requests of the same key. + /// + // TODO(zach): we can handle duplicate requests if we retain the ordering of the keys track + // that all of the keys prior to the one requested have been resolved. + #[derive(Debug)] + struct OrderedGetStore { + // The ObjectStore we are wrapping + inner: T, + // Combined state: queue and wakers, protected by a single mutex + state: Mutex, + } + + #[derive(Debug)] + struct KeysAndWakers { + // Queue of paths in order which they will resolve + ordered_keys: VecDeque, + // Map of paths to wakers for pending get requests + wakers: HashMap, + } + + impl OrderedGetStore { + fn new(inner: T, ordered_keys: &[Path]) -> Self { + let ordered_keys = ordered_keys.to_vec(); + // Check for duplicates + let mut seen = HashSet::new(); + for key in ordered_keys.iter() { + if !seen.insert(key) { + panic!("Duplicate key in OrderedGetStore: {}", key); + } + } + + let state = KeysAndWakers { + ordered_keys: ordered_keys.into(), + wakers: HashMap::new(), + }; + + Self { + inner, + state: Mutex::new(state), + } + } + } + + impl std::fmt::Display for OrderedGetStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let state = self.state.lock().unwrap(); + write!(f, "OrderedGetStore({:?})", state.ordered_keys) + } + } + + #[async_trait::async_trait] + impl ObjectStore for OrderedGetStore { + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + self.inner.put(location, payload).await + } + + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.inner.put_opts(location, payload, opts).await + } + + async fn put_multipart(&self, location: &Path) -> Result> { + self.inner.put_multipart(location).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOpts, + ) -> Result> { + self.inner.put_multipart_opts(location, opts).await + } + + // A GET request is fulfilled by checking if the requested path is next in order: + // - if yes, remove the path from the queue and proceed with the GET request, then wake the + // next path in order + // - if no, register the waker and wait + async fn get(&self, location: &Path) -> Result { + // Do the actual GET request first, then introduce any artificial ordering delays as needed + let result = self.inner.get(location).await; + + // we implement a future which only resolves once the requested path is next in order + future::poll_fn(move |cx| { + let mut state = self.state.lock().unwrap(); + let Some(next_key) = state.ordered_keys.front() else { + panic!("Ran out of keys before {location}"); + }; + if next_key == location { + // We are next in line. Nobody else can remove our key, and our successor + // cannot race with us to register itself because we hold the lock. + // + // first, remove our key from the queue. + // + // note: safe to unwrap because we just checked that the front key exists (and + // is the same as our requested location) + state.ordered_keys.pop_front().unwrap(); + + // there are three possible cases, either: + // 1. the key has already been requested, hence there is a waker waiting, and we + // need to wake it up + // 2. the next key has no waker registered, in which case we do nothing, and + // whenever the request for said key is made, it will either be next in line + // or a waker will be registered - either case ensuring that the request is + // completed + // 3. the next key is the last key in the queue, in which case there is nothing + // left to do (no need to wake anyone) + if let Some(next_key) = state.ordered_keys.front().cloned() { + if let Some(waker) = state.wakers.remove(&next_key) { + waker.wake(); // NOTE: Not async, returns instantly. + } + } + Poll::Ready(()) + } else { + // We are not next in line, so wait on our key. Nobody can race to remove it + // because we own it; nobody can race to wake us because we hold the lock. + if state + .wakers + .insert(location.clone(), cx.waker().clone()) + .is_some() + { + panic!("Somebody else is already waiting on {location}"); + } + Poll::Pending + } + }) + .await; + + // When we return this result, the future succeeds instantly. Any pending wake() call + // will not be processed before the next time we yield -- unless our executor is + // multi-threaded and happens to have another thread available. In that case, the + // serialization point is the moment our next-key poll_fn issues the wake call (or + // proves no wake is needed). + result + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.inner.get_opts(location, options).await + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + self.inner.get_range(location, range).await + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + self.inner.get_ranges(location, ranges).await + } + + async fn head(&self, location: &Path) -> Result { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { + self.inner.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'_, Result> { + self.inner.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.copy(from, to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.rename(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.rename_if_not_exists(from, to).await + } } #[test] @@ -273,7 +525,7 @@ mod tests { "./tests/data/table-with-dv-small/_delta_log/00000000000000000000.json", )) .unwrap(); - let url = url::Url::from_file_path(path).unwrap(); + let url = Url::from_file_path(path).unwrap(); let location = Path::from(url.path()); let meta = store.head(&location).await.unwrap(); @@ -286,21 +538,188 @@ mod tests { let handler = DefaultJsonHandler::new(store, Arc::new(TokioBackgroundExecutor::new())); let physical_schema = Arc::new(ArrowSchema::try_from(get_log_schema().as_ref()).unwrap()); let data: Vec = handler - .read_json_files(files, Arc::new(physical_schema.try_into().unwrap()), None) + .read_json_files(files, get_log_schema().clone(), None) .unwrap() - .map(|ed_res| { - // TODO(nick) make this easier - ed_res.and_then(|ed| { - ed.into_any() - .downcast::() - .map_err(|_| Error::engine_data_type("ArrowEngineData")) - .map(|sd| sd.into()) - }) - }) + .map_ok(into_record_batch) .try_collect() .unwrap(); assert_eq!(data.len(), 1); assert_eq!(data[0].num_rows(), 4); + + // limit batch size + let handler = handler.with_batch_size(2); + let data: Vec = handler + .read_json_files(files, Arc::new(physical_schema.try_into().unwrap()), None) + .unwrap() + .map_ok(into_record_batch) + .try_collect() + .unwrap(); + + assert_eq!(data.len(), 2); + assert_eq!(data[0].num_rows(), 2); + assert_eq!(data[1].num_rows(), 2); + } + + #[tokio::test] + async fn test_ordered_get_store() { + // note we don't want to go over 1000 since we only buffer 1000 requests at a time + let num_paths = 1000; + let ordered_paths: Vec = (0..num_paths) + .map(|i| Path::from(format!("/test/path{}", i))) + .collect(); + let jumbled_paths: Vec<_> = ordered_paths[100..400] + .iter() + .chain(ordered_paths[400..].iter().rev()) + .chain(ordered_paths[..100].iter()) + .cloned() + .collect(); + + let memory_store = InMemory::new(); + for (i, path) in ordered_paths.iter().enumerate() { + memory_store + .put(path, Bytes::from(format!("content_{}", i)).into()) + .await + .unwrap(); + } + + // Create ordered store with natural order (0, 1, 2, ...) + let ordered_store = Arc::new(OrderedGetStore::new(memory_store, &ordered_paths)); + + let (tx, rx) = mpsc::channel(); + + // Spawn tasks to GET each path in our somewhat jumbled order + // They should complete in order (0, 1, 2, ...) due to OrderedGetStore + let handles = jumbled_paths.into_iter().map(|path| { + let store = ordered_store.clone(); + let tx = tx.clone(); + tokio::spawn(async move { + let _ = store.get(&path).await.unwrap(); + tx.send(path).unwrap(); + }) + }); + + // TODO(zach): we need to join all the handles otherwise none of the tasks run? despite the + // docs? + future::join_all(handles).await; + drop(tx); + + // NB (from mpsc::Receiver::recv): This function will always block the current thread if + // there is no data available and it's possible for more data to be sent (at least one + // sender still exists). + let mut completed = Vec::new(); + while let Ok(path) = rx.recv() { + completed.push(path); + } + + assert_eq!( + completed, + ordered_paths.into_iter().collect_vec(), + "Expected paths to complete in order" + ); + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 3)] + async fn test_read_json_files_ordering() { + // this test checks that the read_json_files method returns the files in order in the + // presence of an ObjectStore (OrderedGetStore) that resolves paths in a jumbled order: + // 1. we set up a list of FileMetas (and some random JSON content) in order + // 2. we then set up an ObjectStore to resolves those paths in a jumbled order + // 3. then call read_json_files and check that the results are in order + let ordered_paths: Vec = (0..1000) + .map(|i| Path::from(format!("test/path{}", i))) + .collect(); + + let test_list: &[(usize, Vec)] = &[ + // test 1: buffer_size = 1000, just 1000 jumbled paths + ( + 1000, // buffer_size + ordered_paths[100..400] + .iter() + .chain(ordered_paths[400..].iter().rev()) + .chain(ordered_paths[..100].iter()) + .cloned() + .collect(), + ), + // test 2: buffer_size = 4, jumbled paths in groups of 4 + ( + 4, // buffer_size + (0..250) + .flat_map(|i| { + [ + ordered_paths[1 + 4 * i].clone(), + ordered_paths[4 * i].clone(), + ordered_paths[3 + 4 * i].clone(), + ordered_paths[2 + 4 * i].clone(), + ] + }) + .collect_vec(), + ), + ]; + + let memory_store = InMemory::new(); + for (i, path) in ordered_paths.iter().enumerate() { + memory_store + .put(path, Bytes::from(format!("{{\"val\": {i}}}")).into()) + .await + .unwrap(); + } + + for (buffer_size, jumbled_paths) in test_list { + // set up our ObjectStore to resolve paths in a jumbled order + let store = Arc::new(OrderedGetStore::new(memory_store.fork(), jumbled_paths)); + + // convert the paths to FileMeta + let ordered_file_meta: Vec<_> = ordered_paths + .iter() + .map(|path| { + let store = store.clone(); + async move { + let url = Url::parse(&format!("memory:/{}", path)).unwrap(); + let location = Path::from(path.as_ref()); + let meta = store.head(&location).await.unwrap(); + FileMeta { + location: url, + last_modified: meta.last_modified.timestamp_millis(), + size: meta.size, + } + } + }) + .collect(); + + // note: join_all is ordered + let files = future::join_all(ordered_file_meta).await; + + // fire off the read_json_files call (for all the files in order) + let handler = DefaultJsonHandler::new( + store, + Arc::new(TokioMultiThreadExecutor::new( + tokio::runtime::Handle::current(), + )), + ); + let handler = handler.with_buffer_size(*buffer_size); + let schema = Arc::new(ArrowSchema::new(vec![Arc::new(Field::new( + "val", + DataType::Int32, + true, + ))])); + let physical_schema = Arc::new(schema.try_into().unwrap()); + let data: Vec = handler + .read_json_files(&files, physical_schema, None) + .unwrap() + .map_ok(into_record_batch) + .try_collect() + .unwrap(); + + // check the order + let all_values: Vec = data + .iter() + .flat_map(|batch| { + let val_col: &Int32Array = batch.column(0).as_primitive(); + (0..val_col.len()).map(|i| val_col.value(i)).collect_vec() + }) + .collect(); + assert_eq!(all_values, (0..1000).collect_vec()); + } } } diff --git a/kernel/src/engine/default/mod.rs b/kernel/src/engine/default/mod.rs index d89cf29cd..49a008136 100644 --- a/kernel/src/engine/default/mod.rs +++ b/kernel/src/engine/default/mod.rs @@ -10,20 +10,19 @@ use std::collections::HashMap; use std::sync::Arc; use self::storage::parse_url_opts; -use object_store::{path::Path, DynObjectStore}; +use object_store::DynObjectStore; use url::Url; use self::executor::TaskExecutor; -use self::filesystem::ObjectStoreFileSystemClient; +use self::filesystem::ObjectStoreStorageHandler; use self::json::DefaultJsonHandler; use self::parquet::DefaultParquetHandler; use super::arrow_data::ArrowEngineData; -use super::arrow_expression::ArrowExpressionHandler; +use super::arrow_expression::ArrowEvaluationHandler; use crate::schema::Schema; use crate::transaction::WriteContext; use crate::{ - DeltaResult, Engine, EngineData, ExpressionHandler, FileSystemClient, JsonHandler, - ParquetHandler, + DeltaResult, Engine, EngineData, EvaluationHandler, JsonHandler, ParquetHandler, StorageHandler, }; pub mod executor; @@ -35,11 +34,11 @@ pub mod storage; #[derive(Debug)] pub struct DefaultEngine { - store: Arc, - file_system: Arc>, + object_store: Arc, + storage: Arc>, json: Arc>, parquet: Arc>, - expression: Arc, + expression: Arc, } impl DefaultEngine { @@ -60,18 +59,17 @@ impl DefaultEngine { V: Into, { // table root is the path of the table in the ObjectStore - let (store, table_root) = parse_url_opts(table_root, options)?; - Ok(Self::new(Arc::new(store), table_root, task_executor)) + let (object_store, _table_root) = parse_url_opts(table_root, options)?; + Ok(Self::new(Arc::new(object_store), task_executor)) } /// Create a new [`DefaultEngine`] instance /// /// # Parameters /// - /// - `store`: The object store to use. - /// - `table_root_path`: The root path of the table within storage. + /// - `object_store`: The object store to use. /// - `task_executor`: Used to spawn async IO tasks. See [executor::TaskExecutor]. - pub fn new(store: Arc, table_root: Path, task_executor: Arc) -> Self { + pub fn new(object_store: Arc, task_executor: Arc) -> Self { // HACK to check if we're using a LocalFileSystem from ObjectStore. We need this because // local filesystem doesn't return a sorted list by default. Although the `object_store` // crate explicitly says it _does not_ return a sorted listing, in practice all the cloud @@ -91,27 +89,29 @@ impl DefaultEngine { // in your Cloud Storage buckets, which are ordered in the list lexicographically by name." // So we just need to know if we're local and then if so, we sort the returned file list in // `filesystem.rs` - let store_str = format!("{}", store); + let store_str = format!("{}", object_store); let is_local = store_str.starts_with("LocalFileSystem"); Self { - file_system: Arc::new(ObjectStoreFileSystemClient::new( - store.clone(), + storage: Arc::new(ObjectStoreStorageHandler::new( + object_store.clone(), !is_local, - table_root, task_executor.clone(), )), json: Arc::new(DefaultJsonHandler::new( - store.clone(), + object_store.clone(), task_executor.clone(), )), - parquet: Arc::new(DefaultParquetHandler::new(store.clone(), task_executor)), - store, - expression: Arc::new(ArrowExpressionHandler {}), + parquet: Arc::new(DefaultParquetHandler::new( + object_store.clone(), + task_executor, + )), + object_store, + expression: Arc::new(ArrowEvaluationHandler {}), } } pub fn get_object_store_for_url(&self, _url: &Url) -> Option> { - Some(self.store.clone()) + Some(self.object_store.clone()) } pub async fn write_parquet( @@ -124,7 +124,7 @@ impl DefaultEngine { let transform = write_context.logical_to_physical(); let input_schema: Schema = data.record_batch().schema().try_into()?; let output_schema = write_context.schema(); - let logical_to_physical_expr = self.get_expression_handler().get_evaluator( + let logical_to_physical_expr = self.evaluation_handler().new_expression_evaluator( input_schema.into(), transform.clone(), output_schema.clone().into(), @@ -142,19 +142,90 @@ impl DefaultEngine { } impl Engine for DefaultEngine { - fn get_expression_handler(&self) -> Arc { + fn evaluation_handler(&self) -> Arc { self.expression.clone() } - fn get_file_system_client(&self) -> Arc { - self.file_system.clone() + fn storage_handler(&self) -> Arc { + self.storage.clone() } - fn get_json_handler(&self) -> Arc { + fn json_handler(&self) -> Arc { self.json.clone() } - fn get_parquet_handler(&self) -> Arc { + fn parquet_handler(&self) -> Arc { self.parquet.clone() } } + +trait UrlExt { + // Check if a given url is a presigned url and can be used + // to access the object store via simple http requests + fn is_presigned(&self) -> bool; +} + +impl UrlExt for Url { + fn is_presigned(&self) -> bool { + matches!(self.scheme(), "http" | "https") + && ( + // https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html + // https://developers.cloudflare.com/r2/api/s3/presigned-urls/ + self + .query_pairs() + .any(|(k, _)| k.eq_ignore_ascii_case("X-Amz-Signature")) || + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas#version-2020-12-06-and-later + // note signed permission (sp) must always be present + self + .query_pairs().any(|(k, _)| k.eq_ignore_ascii_case("sp")) || + // https://cloud.google.com/storage/docs/authentication/signatures + self + .query_pairs().any(|(k, _)| k.eq_ignore_ascii_case("X-Goog-Credential")) || + // https://www.alibabacloud.com/help/en/oss/user-guide/upload-files-using-presigned-urls + self + .query_pairs().any(|(k, _)| k.eq_ignore_ascii_case("X-OSS-Credential")) + ) + } +} + +#[cfg(test)] +mod tests { + use super::executor::tokio::TokioBackgroundExecutor; + use super::*; + use crate::engine::tests::test_arrow_engine; + use object_store::local::LocalFileSystem; + + #[test] + fn test_default_engine() { + let tmp = tempfile::tempdir().unwrap(); + let url = Url::from_directory_path(tmp.path()).unwrap(); + let object_store = Arc::new(LocalFileSystem::new()); + let engine = DefaultEngine::new(object_store, Arc::new(TokioBackgroundExecutor::new())); + test_arrow_engine(&engine, &url); + } + + #[test] + fn test_pre_signed_url() { + let url = Url::parse("https://example.com?X-Amz-Signature=foo").unwrap(); + assert!(url.is_presigned()); + + let url = Url::parse("https://example.com?sp=foo").unwrap(); + assert!(url.is_presigned()); + + let url = Url::parse("https://example.com?X-Goog-Credential=foo").unwrap(); + assert!(url.is_presigned()); + + let url = Url::parse("https://example.com?X-OSS-Credential=foo").unwrap(); + assert!(url.is_presigned()); + + // assert that query keys are case insensitive + let url = Url::parse("https://example.com?x-gooG-credenTIAL=foo").unwrap(); + assert!(url.is_presigned()); + + let url = Url::parse("https://example.com?x-oss-CREDENTIAL=foo").unwrap(); + assert!(url.is_presigned()); + + let url = Url::parse("https://example.com").unwrap(); + assert!(!url.is_presigned()); + } +} diff --git a/kernel/src/engine/default/parquet.rs b/kernel/src/engine/default/parquet.rs index a65d329a2..8636b3d9f 100644 --- a/kernel/src/engine/default/parquet.rs +++ b/kernel/src/engine/default/parquet.rs @@ -4,21 +4,22 @@ use std::collections::HashMap; use std::ops::Range; use std::sync::Arc; -use arrow_array::builder::{MapBuilder, MapFieldNames, StringBuilder}; -use arrow_array::{BooleanArray, Int64Array, RecordBatch, StringArray}; +use crate::arrow::array::builder::{MapBuilder, MapFieldNames, StringBuilder}; +use crate::arrow::array::{BooleanArray, Int64Array, RecordBatch, StringArray}; +use crate::parquet::arrow::arrow_reader::{ + ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +}; +use crate::parquet::arrow::arrow_writer::ArrowWriter; +use crate::parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use futures::StreamExt; use object_store::path::Path; use object_store::DynObjectStore; -use parquet::arrow::arrow_reader::{ - ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, -}; -use parquet::arrow::arrow_writer::ArrowWriter; -use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use uuid::Uuid; use super::file_stream::{FileOpenFuture, FileOpener, FileStream}; +use super::UrlExt; use crate::engine::arrow_data::ArrowEngineData; -use crate::engine::arrow_utils::{generate_mask, get_requested_indices, reorder_struct_array}; +use crate::engine::arrow_utils::{fixup_parquet_read, generate_mask, get_requested_indices}; use crate::engine::default::executor::TaskExecutor; use crate::engine::parquet_row_group_skipping::ParquetRowGroupSkipping; use crate::schema::SchemaRef; @@ -191,18 +192,19 @@ impl ParquetHandler for DefaultParquetHandler { // -> reqwest to get data // -> parse to parquet // SAFETY: we did is_empty check above, this is ok. - let file_opener: Box = match files[0].location.scheme() { - "http" | "https" => Box::new(PresignedUrlOpener::new( + let file_opener: Box = if files[0].location.is_presigned() { + Box::new(PresignedUrlOpener::new( 1024, physical_schema.clone(), predicate, - )), - _ => Box::new(ParquetOpener::new( + )) + } else { + Box::new(ParquetOpener::new( 1024, physical_schema.clone(), predicate, self.store.clone(), - )), + )) }; FileStream::new_async_read_iterator( self.task_executor.clone(), @@ -281,12 +283,7 @@ impl FileOpener for ParquetOpener { let stream = builder.with_batch_size(batch_size).build()?; - let stream = stream.map(move |rbr| { - // re-order each batch if needed - rbr.map_err(Error::Parquet).and_then(|rb| { - reorder_struct_array(rb.into(), &requested_ordering).map(Into::into) - }) - }); + let stream = stream.map(move |rbr| fixup_parquet_read(rbr?, &requested_ordering)); Ok(stream.boxed()) })) } @@ -355,12 +352,7 @@ impl FileOpener for PresignedUrlOpener { let reader = builder.with_batch_size(batch_size).build()?; let stream = futures::stream::iter(reader); - let stream = stream.map(move |rbr| { - // re-order each batch if needed - rbr.map_err(Error::Arrow).and_then(|rb| { - reorder_struct_array(rb.into(), &requested_ordering).map(Into::into) - }) - }); + let stream = stream.map(move |rbr| fixup_parquet_read(rbr?, &requested_ordering)); Ok(stream.boxed()) })) } @@ -371,8 +363,7 @@ mod tests { use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; - use arrow_array::array::Array; - use arrow_array::RecordBatch; + use crate::arrow::array::{Array, RecordBatch}; use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore}; use url::Url; @@ -519,7 +510,7 @@ mod tests { .try_into() .unwrap(); - let filename = location.path().split('/').last().unwrap(); + let filename = location.path().split('/').next_back().unwrap(); assert_eq!(&expected_location.join(filename).unwrap(), location); assert_eq!(expected_size, size); assert!(now - last_modified < 10_000); diff --git a/kernel/src/engine/ensure_data_types.rs b/kernel/src/engine/ensure_data_types.rs index b6f186671..da699be07 100644 --- a/kernel/src/engine/ensure_data_types.rs +++ b/kernel/src/engine/ensure_data_types.rs @@ -5,7 +5,7 @@ use std::{ ops::Deref, }; -use arrow_schema::{DataType as ArrowDataType, Field as ArrowField}; +use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField}; use itertools::Itertools; use crate::{ @@ -256,7 +256,7 @@ fn metadata_eq( #[cfg(test)] mod tests { - use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, Fields}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField, Fields}; use crate::{ engine::ensure_data_types::ensure_data_types, @@ -276,8 +276,8 @@ mod tests { assert!(can_upcast_to_decimal(&Decimal128(5, 1), 6u8, 2i8)); assert!(can_upcast_to_decimal( &Decimal128(10, 5), - arrow_schema::DECIMAL128_MAX_PRECISION, - arrow_schema::DECIMAL128_MAX_SCALE - 5 + crate::arrow::datatypes::DECIMAL128_MAX_PRECISION, + crate::arrow::datatypes::DECIMAL128_MAX_SCALE - 5 )); assert!(can_upcast_to_decimal(&Int8, 3u8, 0i8)); diff --git a/kernel/src/engine/mod.rs b/kernel/src/engine/mod.rs index 8ea07384a..c58b882f7 100644 --- a/kernel/src/engine/mod.rs +++ b/kernel/src/engine/mod.rs @@ -27,3 +27,80 @@ pub(crate) mod arrow_get_data; pub(crate) mod ensure_data_types; #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] pub mod parquet_row_group_skipping; + +#[cfg(test)] +mod tests { + use itertools::Itertools; + use object_store::path::Path; + use std::sync::Arc; + use url::Url; + + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; + use crate::engine::arrow_data::ArrowEngineData; + use crate::{Engine, EngineData}; + + use test_utils::delta_path_for_version; + + fn test_list_from_should_sort_and_filter( + engine: &dyn Engine, + base_url: &Url, + engine_data: impl Fn() -> Box, + ) { + let json = engine.json_handler(); + let get_data = || Box::new(std::iter::once(Ok(engine_data()))); + + let expected_names: Vec = (1..4) + .map(|i| delta_path_for_version(i, "json")) + .collect_vec(); + + for i in expected_names.iter().rev() { + let path = base_url.join(i.as_ref()).unwrap(); + json.write_json_file(&path, get_data(), false).unwrap(); + } + let path = base_url.join("other").unwrap(); + json.write_json_file(&path, get_data(), false).unwrap(); + + let storage = engine.storage_handler(); + + // list files after an offset + let test_url = base_url.join(expected_names[0].as_ref()).unwrap(); + let files: Vec<_> = storage.list_from(&test_url).unwrap().try_collect().unwrap(); + assert_eq!(files.len(), expected_names.len() - 1); + for (file, expected) in files.iter().zip(expected_names.iter().skip(1)) { + assert_eq!(file.location, base_url.join(expected.as_ref()).unwrap()); + } + + let test_url = base_url + .join(delta_path_for_version(0, "json").as_ref()) + .unwrap(); + let files: Vec<_> = storage.list_from(&test_url).unwrap().try_collect().unwrap(); + assert_eq!(files.len(), expected_names.len()); + + // list files inside a directory / key prefix + let test_url = base_url.join("_delta_log/").unwrap(); + let files: Vec<_> = storage.list_from(&test_url).unwrap().try_collect().unwrap(); + assert_eq!(files.len(), expected_names.len()); + for (file, expected) in files.iter().zip(expected_names.iter()) { + assert_eq!(file.location, base_url.join(expected.as_ref()).unwrap()); + } + } + + fn get_arrow_data() -> Box { + let schema = Arc::new(ArrowSchema::new(vec![Field::new( + "dog", + ArrowDataType::Utf8, + true, + )])); + let data = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(StringArray::from(vec!["remi", "wilson"]))], + ) + .unwrap(); + Box::new(ArrowEngineData::new(data)) + } + + pub(crate) fn test_arrow_engine(engine: &dyn Engine, base_url: &Url) { + test_list_from_should_sort_and_filter(engine, base_url, get_arrow_data); + } +} diff --git a/kernel/src/engine/parquet_row_group_skipping.rs b/kernel/src/engine/parquet_row_group_skipping.rs index 79c87d923..2464ca455 100644 --- a/kernel/src/engine/parquet_row_group_skipping.rs +++ b/kernel/src/engine/parquet_row_group_skipping.rs @@ -1,15 +1,13 @@ //! An implementation of parquet row group skipping using data skipping predicates over footer stats. -use crate::expressions::{ - BinaryExpression, ColumnName, Expression, Scalar, UnaryExpression, VariadicExpression, -}; -use crate::predicates::parquet_stats_skipping::ParquetStatsProvider; +use crate::expressions::{ColumnName, Expression, Scalar}; +use crate::kernel_predicates::parquet_stats_skipping::ParquetStatsProvider; +use crate::parquet::arrow::arrow_reader::ArrowReaderBuilder; +use crate::parquet::file::metadata::RowGroupMetaData; +use crate::parquet::file::statistics::Statistics; +use crate::parquet::schema::types::ColumnDescPtr; use crate::schema::{DataType, PrimitiveType}; use chrono::{DateTime, Days}; -use parquet::arrow::arrow_reader::ArrowReaderBuilder; -use parquet::file::metadata::RowGroupMetaData; -use parquet::file::statistics::Statistics; -use parquet::schema::types::ColumnDescPtr; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use tracing::debug; #[cfg(test)] @@ -57,7 +55,7 @@ impl<'a> RowGroupFilter<'a> { /// Applies a filtering predicate to a row group. Return value false means to skip it. fn apply(row_group: &'a RowGroupMetaData, predicate: &Expression) -> bool { - use crate::predicates::PredicateEvaluator as _; + use crate::kernel_predicates::KernelPredicateEvaluator as _; RowGroupFilter::new(row_group, predicate).eval_sql_where(predicate) != Some(false) } @@ -225,35 +223,19 @@ pub(crate) fn compute_field_indices( fields: &[ColumnDescPtr], expression: &Expression, ) -> HashMap { - fn do_recurse(expression: &Expression, cols: &mut HashSet) { - use Expression::*; - let mut recurse = |expr| do_recurse(expr, cols); // simplifies the call sites below - match expression { - Literal(_) => {} - Column(name) => cols.extend([name.clone()]), // returns `()`, unlike `insert` - Struct(fields) => fields.iter().for_each(recurse), - Unary(UnaryExpression { expr, .. }) => recurse(expr), - Binary(BinaryExpression { left, right, .. }) => { - [left, right].iter().for_each(|e| recurse(e)) - } - Variadic(VariadicExpression { exprs, .. }) => exprs.iter().for_each(recurse), - } - } - // Build up a set of requested column paths, then take each found path as the corresponding map // key (avoids unnecessary cloning). // // NOTE: If a requested column was not available, it is silently ignored. These missing columns // are implied all-null, so we will infer their min/max stats as NULL and nullcount == rowcount. - let mut requested_columns = HashSet::new(); - do_recurse(expression, &mut requested_columns); + let mut requested_columns = expression.references(); fields .iter() .enumerate() .filter_map(|(i, f)| { requested_columns .take(f.path().parts()) - .map(|path| (path, i)) + .map(|path| (path.clone(), i)) }) .collect() } diff --git a/kernel/src/engine/parquet_row_group_skipping/tests.rs b/kernel/src/engine/parquet_row_group_skipping/tests.rs index 37a3bb1b0..1ad2208db 100644 --- a/kernel/src/engine/parquet_row_group_skipping/tests.rs +++ b/kernel/src/engine/parquet_row_group_skipping/tests.rs @@ -1,8 +1,8 @@ use super::*; use crate::expressions::{column_expr, column_name}; -use crate::predicates::DataSkippingPredicateEvaluator as _; +use crate::kernel_predicates::DataSkippingPredicateEvaluator as _; +use crate::parquet::arrow::arrow_reader::ArrowReaderMetadata; use crate::Expression; -use parquet::arrow::arrow_reader::ArrowReaderMetadata; use std::fs::File; /// Performs an exhaustive set of reads against a specially crafted parquet file. diff --git a/kernel/src/engine/sync/json.rs b/kernel/src/engine/sync/json.rs index 3d33b1025..f2212cb81 100644 --- a/kernel/src/engine/sync/json.rs +++ b/kernel/src/engine/sync/json.rs @@ -1,6 +1,7 @@ use std::{fs::File, io::BufReader, io::Write}; -use arrow_schema::SchemaRef as ArrowSchemaRef; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; +use crate::arrow::json::ReaderBuilder; use tempfile::NamedTempFile; use url::Url; @@ -22,7 +23,7 @@ fn try_create_from_json( arrow_schema: ArrowSchemaRef, _predicate: Option, ) -> DeltaResult>> { - let json = arrow_json::ReaderBuilder::new(arrow_schema) + let json = ReaderBuilder::new(arrow_schema) .build(BufReader::new(file))? .map(|data| Ok(ArrowEngineData::new(data?))); Ok(json) @@ -65,6 +66,10 @@ impl JsonHandler for SyncJsonHandler { ))); }; + if !parent.exists() { + std::fs::create_dir_all(parent)?; + } + // write data to tmp file let mut tmp_file = NamedTempFile::new_in(parent)?; let buf = to_json_bytes(data)?; @@ -92,10 +97,8 @@ mod tests { use std::sync::Arc; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::DataType as ArrowDataType; - use arrow_schema::Field; - use arrow_schema::Schema as ArrowSchema; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use serde_json::json; use url::Url; diff --git a/kernel/src/engine/sync/mod.rs b/kernel/src/engine/sync/mod.rs index f637ec105..0c119396e 100644 --- a/kernel/src/engine/sync/mod.rs +++ b/kernel/src/engine/sync/mod.rs @@ -1,58 +1,58 @@ //! A simple, single threaded, [`Engine`] that can only read from the local filesystem -use super::arrow_expression::ArrowExpressionHandler; +use super::arrow_expression::ArrowEvaluationHandler; use crate::engine::arrow_data::ArrowEngineData; use crate::{ - DeltaResult, Engine, Error, ExpressionHandler, ExpressionRef, FileDataReadResultIterator, - FileMeta, FileSystemClient, JsonHandler, ParquetHandler, SchemaRef, + DeltaResult, Engine, Error, EvaluationHandler, ExpressionRef, FileDataReadResultIterator, + FileMeta, JsonHandler, ParquetHandler, SchemaRef, StorageHandler, }; -use arrow_schema::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; +use crate::arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; use itertools::Itertools; use std::fs::File; use std::sync::Arc; use tracing::debug; -mod fs_client; pub(crate) mod json; mod parquet; +mod storage; /// This is a simple implementation of [`Engine`]. It only supports reading data from the local /// filesystem, and internally represents data using `Arrow`. pub struct SyncEngine { - fs_client: Arc, + storage_handler: Arc, json_handler: Arc, parquet_handler: Arc, - expression_handler: Arc, + evaluation_handler: Arc, } impl SyncEngine { #[allow(clippy::new_without_default)] pub fn new() -> Self { SyncEngine { - fs_client: Arc::new(fs_client::SyncFilesystemClient {}), + storage_handler: Arc::new(storage::SyncStorageHandler {}), json_handler: Arc::new(json::SyncJsonHandler {}), parquet_handler: Arc::new(parquet::SyncParquetHandler {}), - expression_handler: Arc::new(ArrowExpressionHandler {}), + evaluation_handler: Arc::new(ArrowEvaluationHandler {}), } } } impl Engine for SyncEngine { - fn get_expression_handler(&self) -> Arc { - self.expression_handler.clone() + fn evaluation_handler(&self) -> Arc { + self.evaluation_handler.clone() } - fn get_file_system_client(&self) -> Arc { - self.fs_client.clone() + fn storage_handler(&self) -> Arc { + self.storage_handler.clone() } /// Get the connector provided [`ParquetHandler`]. - fn get_parquet_handler(&self) -> Arc { + fn parquet_handler(&self) -> Arc { self.parquet_handler.clone() } - fn get_json_handler(&self) -> Arc { + fn json_handler(&self) -> Arc { self.json_handler.clone() } } @@ -97,3 +97,17 @@ where .map(|data| Ok(Box::new(ArrowEngineData::new(data??.into())) as _)); Ok(Box::new(result)) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::tests::test_arrow_engine; + + #[test] + fn test_sync_engine() { + let tmp = tempfile::tempdir().unwrap(); + let url = url::Url::from_directory_path(tmp.path()).unwrap(); + let engine = SyncEngine::new(); + test_arrow_engine(&engine, &url); + } +} diff --git a/kernel/src/engine/sync/parquet.rs b/kernel/src/engine/sync/parquet.rs index 260ef321b..48010af30 100644 --- a/kernel/src/engine/sync/parquet.rs +++ b/kernel/src/engine/sync/parquet.rs @@ -1,11 +1,11 @@ use std::fs::File; -use arrow_schema::SchemaRef as ArrowSchemaRef; -use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder}; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; +use crate::parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder}; use super::read_files; use crate::engine::arrow_data::ArrowEngineData; -use crate::engine::arrow_utils::{generate_mask, get_requested_indices, reorder_struct_array}; +use crate::engine::arrow_utils::{fixup_parquet_read, generate_mask, get_requested_indices}; use crate::engine::parquet_row_group_skipping::ParquetRowGroupSkipping; use crate::schema::SchemaRef; use crate::{DeltaResult, ExpressionRef, FileDataReadResultIterator, FileMeta, ParquetHandler}; @@ -28,10 +28,8 @@ fn try_create_from_parquet( if let Some(predicate) = predicate { builder = builder.with_row_group_filter(predicate.as_ref()); } - Ok(builder.build()?.map(move |data| { - let reordered = reorder_struct_array(data?.into(), &requested_ordering)?; - Ok(ArrowEngineData::new(reordered.into())) - })) + let stream = builder.build()?; + Ok(stream.map(move |rbr| fixup_parquet_read(rbr?, &requested_ordering))) } impl ParquetHandler for SyncParquetHandler { diff --git a/kernel/src/engine/sync/fs_client.rs b/kernel/src/engine/sync/storage.rs similarity index 88% rename from kernel/src/engine/sync/fs_client.rs rename to kernel/src/engine/sync/storage.rs index 9577b1499..a2bd8f536 100644 --- a/kernel/src/engine/sync/fs_client.rs +++ b/kernel/src/engine/sync/storage.rs @@ -2,11 +2,11 @@ use bytes::Bytes; use itertools::Itertools; use url::Url; -use crate::{DeltaResult, Error, FileMeta, FileSlice, FileSystemClient}; +use crate::{DeltaResult, Error, FileMeta, FileSlice, StorageHandler}; -pub(crate) struct SyncFilesystemClient; +pub(crate) struct SyncStorageHandler; -impl FileSystemClient for SyncFilesystemClient { +impl StorageHandler for SyncStorageHandler { /// List the paths in the same directory that are lexicographically greater or equal to /// (UTF-8 sorting) the given `path`. The result is sorted by the file name. fn list_from( @@ -39,7 +39,7 @@ impl FileSystemClient for SyncFilesystemClient { let all_ents: Vec<_> = std::fs::read_dir(path_to_read)? .filter(|ent_res| { match (ent_res, min_file_name) { - (Ok(ent), Some(min_file_name)) => ent.file_name() >= *min_file_name, + (Ok(ent), Some(min_file_name)) => ent.file_name() > *min_file_name, _ => true, // Keep unfiltered and/or error entries } }) @@ -86,8 +86,8 @@ mod tests { use test_utils::abs_diff; - use super::SyncFilesystemClient; - use crate::FileSystemClient; + use super::SyncStorageHandler; + use crate::StorageHandler; /// generate json filenames that follow the spec (numbered padded to 20 chars) fn get_json_filename(index: usize) -> String { @@ -96,7 +96,7 @@ mod tests { #[test] fn test_file_meta_is_correct() -> Result<(), Box> { - let client = SyncFilesystemClient; + let storage = SyncStorageHandler; let tmp_dir = tempfile::tempdir().unwrap(); let begin_time = SystemTime::now().duration_since(UNIX_EPOCH)?; @@ -106,9 +106,9 @@ mod tests { writeln!(f, "null")?; f.flush()?; - let url_path = tmp_dir.path().join(get_json_filename(1)); + let url_path = tmp_dir.path().join(get_json_filename(0)); let url = Url::from_file_path(url_path).unwrap(); - let files: Vec<_> = client.list_from(&url)?.try_collect()?; + let files: Vec<_> = storage.list_from(&url)?.try_collect()?; assert!(!files.is_empty()); for meta in files.iter() { @@ -120,7 +120,7 @@ mod tests { #[test] fn test_list_from() -> Result<(), Box> { - let client = SyncFilesystemClient; + let storage = SyncStorageHandler; let tmp_dir = tempfile::tempdir().unwrap(); let mut expected = vec![]; for i in 0..3 { @@ -131,27 +131,27 @@ mod tests { } let url_path = tmp_dir.path().join(get_json_filename(1)); let url = Url::from_file_path(url_path).unwrap(); - let list = client.list_from(&url)?; + let list = storage.list_from(&url)?; let mut file_count = 0; for (i, file) in list.enumerate() { // i+1 in index because we started at 0001 in the listing assert_eq!( file?.location.to_file_path().unwrap().to_str().unwrap(), - expected[i + 1].to_str().unwrap() + expected[i + 2].to_str().unwrap() ); file_count += 1; } - assert_eq!(file_count, 2); + assert_eq!(file_count, 1); let url_path = tmp_dir.path().join(""); let url = Url::from_file_path(url_path).unwrap(); - let list = client.list_from(&url)?; + let list = storage.list_from(&url)?; file_count = list.count(); assert_eq!(file_count, 3); let url_path = tmp_dir.path().join(format!("{:020}", 1)); let url = Url::from_file_path(url_path).unwrap(); - let list = client.list_from(&url)?; + let list = storage.list_from(&url)?; file_count = list.count(); assert_eq!(file_count, 2); Ok(()) @@ -159,14 +159,14 @@ mod tests { #[test] fn test_read_files() -> Result<(), Box> { - let client = SyncFilesystemClient; + let storage = SyncStorageHandler; let tmp_dir = tempfile::tempdir().unwrap(); let path = tmp_dir.path().join(get_json_filename(1)); let mut f = File::create(path.clone())?; writeln!(f, "null")?; let url = Url::from_file_path(path).unwrap(); let file_slice = (url.clone(), None); - let read = client.read_files(vec![file_slice])?; + let read = storage.read_files(vec![file_slice])?; let mut file_count = 0; let mut buf = BytesMut::with_capacity(16); buf.put(&b"null\n"[..]); diff --git a/kernel/src/engine_data.rs b/kernel/src/engine_data.rs index 333ced827..54cce0e26 100644 --- a/kernel/src/engine_data.rs +++ b/kernel/src/engine_data.rs @@ -7,6 +7,19 @@ use tracing::debug; use std::collections::HashMap; +/// Engine data paired with a selection vector indicating which rows are logically selected. +/// +/// A value of `true` in the selection vector means the corresponding row is selected (i.e., not deleted), +/// while `false` means the row is logically deleted and should be ignored. +/// +/// Interpreting unselected (`false`) rows will result in incorrect/undefined behavior. +pub struct FilteredEngineData { + // The underlying engine data + pub data: Box, + // The selection vector where `true` marks rows to include in results + pub selection_vector: Vec, +} + /// a trait that an engine exposes to give access to a list pub trait EngineList { /// Return the length of the list at the specified row_index in the raw data diff --git a/kernel/src/error.rs b/kernel/src/error.rs index 815ef3e51..80857b856 100644 --- a/kernel/src/error.rs +++ b/kernel/src/error.rs @@ -10,6 +10,9 @@ use crate::schema::{DataType, StructType}; use crate::table_properties::ParseIntervalError; use crate::Version; +#[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] +use crate::arrow::error::ArrowError; + /// A [`std::result::Result`] that has the kernel [`Error`] as the error variant pub type DeltaResult = std::result::Result; @@ -29,7 +32,7 @@ pub enum Error { /// An error performing operations on arrow data #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] #[error(transparent)] - Arrow(arrow_schema::ArrowError), + Arrow(ArrowError), /// User tried to convert engine data to the wrong type #[error("Invalid engine data type. Could not convert to {0}")] @@ -58,10 +61,10 @@ pub enum Error { #[error("Internal error {0}. This is a kernel bug, please report.")] InternalError(String), - /// An error encountered while working with parquet data - #[cfg(feature = "parquet")] + /// An error enountered while working with parquet data + #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] #[error("Arrow error: {0}")] - Parquet(#[from] parquet::errors::ParquetError), + Parquet(#[from] crate::parquet::errors::ParquetError), /// An error interacting with the object_store crate // We don't use [#from] object_store::Error here as our From impl transforms @@ -195,6 +198,12 @@ pub enum Error { /// Invalid checkpoint files #[error("Invalid Checkpoint: {0}")] InvalidCheckpoint(String), + + /// Error while transforming a schema + leaves into an Expression of literals + #[error(transparent)] + LiteralExpressionTransformError( + #[from] crate::expressions::literal_expression_transform::Error, + ), } // Convenience constructors for Error types that take a String argument @@ -304,8 +313,8 @@ from_with_backtrace!( ); #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] -impl From for Error { - fn from(value: arrow_schema::ArrowError) -> Self { +impl From for Error { + fn from(value: ArrowError) -> Self { Self::Arrow(value).with_backtrace() } } diff --git a/kernel/src/expressions/literal_expression_transform.rs b/kernel/src/expressions/literal_expression_transform.rs new file mode 100644 index 000000000..2d2276c11 --- /dev/null +++ b/kernel/src/expressions/literal_expression_transform.rs @@ -0,0 +1,517 @@ +//! The [`LiteralExpressionTransform`] is a [`SchemaTransform`] that transforms a [`Schema`] and an +//! ordered list of leaf values (scalars) into an [`Expression`] with a literal value for each leaf. + +use std::borrow::Cow; +use std::mem; + +use tracing::debug; + +use crate::expressions::{Expression, Scalar}; +use crate::schema::{ + ArrayType, DataType, MapType, PrimitiveType, SchemaTransform, StructField, StructType, +}; + +/// [`SchemaTransform`] that will transform a [`Schema`] and an ordered list of leaf values +/// (Scalars) into an Expression with a [`Literal`] expr for each leaf. +#[derive(Debug)] +pub(crate) struct LiteralExpressionTransform<'a, T: Iterator> { + /// Leaf values to insert in schema order. + scalars: T, + /// A stack of built Expressions. After visiting children, we pop them off to + /// build the parent container, then push the parent back on. + stack: Vec, + /// Since schema transforms are infallible we keep track of errors here + error: Result<(), Error>, +} + +/// Any error for [`LiteralExpressionTransform`] +#[derive(thiserror::Error, Debug)] +pub enum Error { + /// Schema mismatch error + #[error("Schema error: {0}")] + Schema(String), + + /// Insufficient number of scalars (too many) to create a single-row expression + #[error("Excess scalar: {0} given for literal expression transform")] + ExcessScalars(Scalar), + + /// Insufficient number of scalars (too few) to create a single-row expression + #[error("Too few scalars given for literal expression transform")] + InsufficientScalars, + + /// Empty expression stack after performing the transform + #[error("No Expression was created after performing the transform")] + EmptyStack, + + /// Unsupported operation + #[error("Unsupported operation: {0}")] + Unsupported(String), +} + +impl<'a, I: Iterator> LiteralExpressionTransform<'a, I> { + pub(crate) fn new(scalars: impl IntoIterator) -> Self { + Self { + scalars: scalars.into_iter(), + stack: Vec::new(), + error: Ok(()), + } + } + + /// return the Expression we just built (or propagate Error). the top of `stack` should be our + /// final Expression + pub(crate) fn try_into_expr(mut self) -> Result { + self.error?; + + if let Some(s) = self.scalars.next() { + return Err(Error::ExcessScalars(s.clone())); + } + + self.stack.pop().ok_or(Error::EmptyStack) + } + + fn set_error(&mut self, error: Error) { + if let Err(e) = mem::replace(&mut self.error, Err(error)) { + debug!("Overwriting error that was already set: {e}"); + } + } +} + +impl<'a, T: Iterator> SchemaTransform<'a> for LiteralExpressionTransform<'a, T> { + fn transform_primitive( + &mut self, + prim_type: &'a PrimitiveType, + ) -> Option> { + // first always check error to terminate early if possible + self.error.as_ref().ok()?; + + let Some(scalar) = self.scalars.next() else { + self.set_error(Error::InsufficientScalars); + return None; + }; + + let DataType::Primitive(scalar_type) = scalar.data_type() else { + self.set_error(Error::Schema( + "Non-primitive scalar type {datatype} provided".to_string(), + )); + return None; + }; + if scalar_type != *prim_type { + self.set_error(Error::Schema(format!( + "Mismatched scalar type while creating Expression: expected {}, got {}", + prim_type, scalar_type + ))); + return None; + } + + self.stack.push(Expression::Literal(scalar.clone())); + None + } + + fn transform_struct(&mut self, struct_type: &'a StructType) -> Option> { + // first always check error to terminate early if possible + self.error.as_ref().ok()?; + + // Only consume newly-added entries (if any). There could be fewer than expected if + // the recursion encountered an error. + let mark = self.stack.len(); + self.recurse_into_struct(struct_type)?; + let field_exprs = self.stack.split_off(mark); + + if field_exprs.len() != struct_type.fields_len() { + self.set_error(Error::InsufficientScalars); + return None; + } + + let mut found_non_nullable_null = false; + let mut all_null = true; + let fields = struct_type.fields(); + for (field, expr) in fields.zip(&field_exprs) { + if !matches!(expr, Expression::Literal(Scalar::Null(_))) { + all_null = false; + } else if !field.is_nullable() { + found_non_nullable_null = true; + } + } + + // If all children are NULL and at least one is ostensibly non-nullable, we interpret + // the struct itself as being NULL (if all aren't null then it's an error) + let struct_expr = if found_non_nullable_null { + if !all_null { + // we found a non_nullable NULL, but other siblings are non-null: error + self.set_error(Error::Schema( + "NULL value for non-nullable struct field with non-NULL siblings".to_string(), + )); + return None; + } + Expression::null_literal(struct_type.clone().into()) + } else { + Expression::struct_from(field_exprs) + }; + + self.stack.push(struct_expr); + None + } + + fn transform_struct_field(&mut self, field: &'a StructField) -> Option> { + // first always check error to terminate early if possible + self.error.as_ref().ok()?; + + self.recurse_into_struct_field(field); + Some(Cow::Borrowed(field)) + } + + // arrays unsupported for now + fn transform_array(&mut self, _array_type: &'a ArrayType) -> Option> { + self.error.as_ref().ok()?; + self.set_error(Error::Unsupported( + "ArrayType not yet supported in literal expression transform".to_string(), + )); + None + } + + // maps unsupported for now + fn transform_map(&mut self, _map_type: &'a MapType) -> Option> { + self.error.as_ref().ok()?; + self.set_error(Error::Unsupported( + "MapType not yet supported in literal expression transform".to_string(), + )); + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::sync::Arc; + + use crate::schema::SchemaRef; + use crate::schema::StructType; + use crate::DataType as DeltaDataTypes; + + use paste::paste; + + // helper to take values/schema to pass to `create_one` and assert the result = expected + fn assert_single_row_transform( + values: &[Scalar], + schema: SchemaRef, + expected: Result, + ) { + let mut schema_transform = LiteralExpressionTransform::new(values); + let datatype = schema.into(); + let _transformed = schema_transform.transform(&datatype); + match expected { + Ok(expected_expr) => { + let actual_expr = schema_transform.try_into_expr().unwrap(); + // TODO: we can't compare NULLs so we convert with .to_string to workaround + // see: https://github.com/delta-io/delta-kernel-rs/pull/677 + assert_eq!(expected_expr.to_string(), actual_expr.to_string()); + } + Err(()) => { + assert!(schema_transform.try_into_expr().is_err()); + } + } + } + + #[test] + fn test_create_one_top_level_null() { + let values = &[Scalar::Null(DeltaDataTypes::INTEGER)]; + + let schema = Arc::new(StructType::new([StructField::not_null( + "col_1", + DeltaDataTypes::INTEGER, + )])); + let expected = Expression::null_literal(schema.clone().into()); + assert_single_row_transform(values, schema, Ok(expected)); + + let schema = Arc::new(StructType::new([StructField::nullable( + "col_1", + DeltaDataTypes::INTEGER, + )])); + let expected = + Expression::struct_from(vec![Expression::null_literal(DeltaDataTypes::INTEGER)]); + assert_single_row_transform(values, schema, Ok(expected)); + } + + #[test] + fn test_create_one_missing_values() { + let values = &[1.into()]; + let schema = Arc::new(StructType::new([ + StructField::nullable("col_1", DeltaDataTypes::INTEGER), + StructField::nullable("col_2", DeltaDataTypes::INTEGER), + ])); + assert_single_row_transform(values, schema, Err(())); + } + + #[test] + fn test_create_one_extra_values() { + let values = &[1.into(), 2.into(), 3.into()]; + let schema = Arc::new(StructType::new([ + StructField::nullable("col_1", DeltaDataTypes::INTEGER), + StructField::nullable("col_2", DeltaDataTypes::INTEGER), + ])); + assert_single_row_transform(values, schema, Err(())); + } + + #[test] + fn test_create_one_incorrect_schema() { + let values = &["a".into()]; + let schema = Arc::new(StructType::new([StructField::nullable( + "col_1", + DeltaDataTypes::INTEGER, + )])); + assert_single_row_transform(values, schema, Err(())); + } + + // useful test to make sure that we correctly process the stack + #[test] + fn test_many_structs() { + let values: &[Scalar] = &[1.into(), 2.into(), 3.into(), 4.into()]; + let schema = Arc::new(StructType::new([ + StructField::nullable( + "x", + DeltaDataTypes::struct_type([ + StructField::not_null("a", DeltaDataTypes::INTEGER), + StructField::nullable("b", DeltaDataTypes::INTEGER), + ]), + ), + StructField::nullable( + "y", + DeltaDataTypes::struct_type([ + StructField::not_null("c", DeltaDataTypes::INTEGER), + StructField::nullable("d", DeltaDataTypes::INTEGER), + ]), + ), + ])); + let expected = Expression::struct_from(vec![ + Expression::struct_from(vec![Expression::literal(1), Expression::literal(2)]), + Expression::struct_from(vec![Expression::literal(3), Expression::literal(4)]), + ]); + assert_single_row_transform(values, schema, Ok(expected)); + } + + #[derive(Clone, Copy)] + struct TestSchema { + x_nullable: bool, + a_nullable: bool, + b_nullable: bool, + } + + enum Expected { + Noop, + NullStruct, + Null, + Error, // TODO: we could check the actual error + } + + fn run_test(test_schema: TestSchema, values: (Option, Option), expected: Expected) { + let (a_val, b_val) = values; + let a = match a_val { + Some(v) => Scalar::Integer(v), + None => Scalar::Null(DeltaDataTypes::INTEGER), + }; + let b = match b_val { + Some(v) => Scalar::Integer(v), + None => Scalar::Null(DeltaDataTypes::INTEGER), + }; + let values: &[Scalar] = &[a, b]; + + let field_a = StructField::new("a", DeltaDataTypes::INTEGER, test_schema.a_nullable); + let field_b = StructField::new("b", DeltaDataTypes::INTEGER, test_schema.b_nullable); + let field_x = StructField::new( + "x", + StructType::new([field_a.clone(), field_b.clone()]), + test_schema.x_nullable, + ); + let schema = Arc::new(StructType::new([field_x.clone()])); + + let expected_result = match expected { + Expected::Noop => { + let nested_struct = Expression::struct_from(vec![ + Expression::literal(values[0].clone()), + Expression::literal(values[1].clone()), + ]); + Ok(Expression::struct_from([nested_struct])) + } + Expected::Null => Ok(Expression::null_literal(schema.clone().into())), + Expected::NullStruct => { + let nested_null = Expression::null_literal(field_x.data_type().clone()); + Ok(Expression::struct_from([nested_null])) + } + Expected::Error => Err(()), + }; + + assert_single_row_transform(values, schema, expected_result); + } + + // helper to convert nullable/not_null to bool + macro_rules! bool_from_nullable { + (nullable) => { + true + }; + (not_null) => { + false + }; + } + + // helper to convert a/b/N to Some/Some/None (1 and 2 just arbitrary non-null ints) + macro_rules! parse_value { + (a) => { + Some(1) + }; + (b) => { + Some(2) + }; + (N) => { + None + }; + } + + macro_rules! test_nullability_combinations { + ( + name = $name:ident, + schema = { x: $x:ident, a: $a:ident, b: $b:ident }, + tests = { + ($ta1:tt, $tb1:tt) -> $expected1:ident, + ($ta2:tt, $tb2:tt) -> $expected2:ident, + ($ta3:tt, $tb3:tt) -> $expected3:ident, + ($ta4:tt, $tb4:tt) -> $expected4:ident $(,)? + } + ) => { + paste! { + #[test] + fn [<$name _ $ta1:lower _ $tb1:lower>]() { + let schema = TestSchema { + x_nullable: bool_from_nullable!($x), + a_nullable: bool_from_nullable!($a), + b_nullable: bool_from_nullable!($b), + }; + run_test(schema, (parse_value!($ta1), parse_value!($tb1)), Expected::$expected1); + } + #[test] + fn [<$name _ $ta2:lower _ $tb2:lower>]() { + let schema = TestSchema { + x_nullable: bool_from_nullable!($x), + a_nullable: bool_from_nullable!($a), + b_nullable: bool_from_nullable!($b), + }; + run_test(schema, (parse_value!($ta2), parse_value!($tb2)), Expected::$expected2); + } + #[test] + fn [<$name _ $ta3:lower _ $tb3:lower>]() { + let schema = TestSchema { + x_nullable: bool_from_nullable!($x), + a_nullable: bool_from_nullable!($a), + b_nullable: bool_from_nullable!($b), + }; + run_test(schema, (parse_value!($ta3), parse_value!($tb3)), Expected::$expected3); + } + #[test] + fn [<$name _ $ta4:lower _ $tb4:lower>]() { + let schema = TestSchema { + x_nullable: bool_from_nullable!($x), + a_nullable: bool_from_nullable!($a), + b_nullable: bool_from_nullable!($b), + }; + run_test(schema, (parse_value!($ta4), parse_value!($tb4)), Expected::$expected4); + } + } + } + } + + // Group 1: nullable { nullable, nullable } + // 1. (a, b) -> x (a, b) + // 2. (N, b) -> x (N, b) + // 3. (a, N) -> x (a, N) + // 4. (N, N) -> x (N, N) + test_nullability_combinations! { + name = test_all_nullable, + schema = { x: nullable, a: nullable, b: nullable }, + tests = { + (a, b) -> Noop, + (N, b) -> Noop, + (a, N) -> Noop, + (N, N) -> Noop, + } + } + + // Group 2: nullable { nullable, not_null } + // 1. (a, b) -> x (a, b) + // 2. (N, b) -> x (N, b) + // 3. (a, N) -> Err + // 4. (N, N) -> x NULL + test_nullability_combinations! { + name = test_nullable_nullable_not_null, + schema = { x: nullable, a: nullable, b: not_null }, + tests = { + (a, b) -> Noop, + (N, b) -> Noop, + (a, N) -> Error, + (N, N) -> NullStruct, + } + } + + // Group 3: nullable { not_null, not_null } + // 1. (a, b) -> x (a, b) + // 2. (N, b) -> Err + // 3. (a, N) -> Err + // 4. (N, N) -> x NULL + test_nullability_combinations! { + name = test_nullable_not_null_not_null, + schema = { x: nullable, a: not_null, b: not_null }, + tests = { + (a, b) -> Noop, + (N, b) -> Error, + (a, N) -> Error, + (N, N) -> NullStruct, + } + } + + // Group 4: not_null { nullable, nullable } + // 1. (a, b) -> x (a, b) + // 2. (N, b) -> x (N, b) + // 3. (a, N) -> x (a, N) + // 4. (N, N) -> x (N, N) + test_nullability_combinations! { + name = test_not_null_nullable_nullable, + schema = { x: not_null, a: nullable, b: nullable }, + tests = { + (a, b) -> Noop, + (N, b) -> Noop, + (a, N) -> Noop, + (N, N) -> Noop, + } + } + + // Group 5: not_null { nullable, not_null } + // 1. (a, b) -> x (a, b) + // 2. (N, b) -> x (N, b) + // 3. (a, N) -> Err + // 4. (N, N) -> NULL + test_nullability_combinations! { + name = test_not_null_nullable_not_null, + schema = { x: not_null, a: nullable, b: not_null }, + tests = { + (a, b) -> Noop, + (N, b) -> Noop, + (a, N) -> Error, + (N, N) -> Null, + } + } + + // Group 6: not_null { not_null, not_null } + // 1. (a, b) -> x (a, b) + // 2. (N, b) -> Err + // 3. (a, N) -> Err + // 4. (N, N) -> NULL + test_nullability_combinations! { + name = test_all_not_null, + schema = { x: not_null, a: not_null, b: not_null }, + tests = { + (a, b) -> Noop, + (N, b) -> Error, + (a, N) -> Error, + (N, N) -> Null, + } + } +} diff --git a/kernel/src/expressions/mod.rs b/kernel/src/expressions/mod.rs index 9f4972408..b3de5c4c3 100644 --- a/kernel/src/expressions/mod.rs +++ b/kernel/src/expressions/mod.rs @@ -15,6 +15,8 @@ use crate::DataType; mod column_names; mod scalars; +pub(crate) mod literal_expression_transform; + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] /// A binary operator. pub enum BinaryOperator { @@ -235,15 +237,9 @@ impl Display for Expression { impl Expression { /// Returns a set of columns referenced by this expression. pub fn references(&self) -> HashSet<&ColumnName> { - let mut set = HashSet::new(); - - for expr in self.walk() { - if let Self::Column(name) = expr { - set.insert(name); - } - } - - set + let mut references = GetColumnReferences::default(); + let _ = references.transform(self); + references.into_inner() } /// Create a new column name expression from input satisfying `FromIterator for ColumnName`. @@ -369,26 +365,6 @@ impl Expression { pub fn distinct(self, other: impl Into) -> Self { Self::binary(BinaryOperator::Distinct, self, other) } - - fn walk(&self) -> impl Iterator + '_ { - use Expression::*; - let mut stack = vec![self]; - std::iter::from_fn(move || { - let expr = stack.pop()?; - match expr { - Literal(_) => {} - Column { .. } => {} - Struct(exprs) => stack.extend(exprs), - Unary(UnaryExpression { expr, .. }) => stack.push(expr), - Binary(BinaryExpression { left, right, .. }) => { - stack.push(left); - stack.push(right); - } - Variadic(VariadicExpression { exprs, .. }) => stack.extend(exprs), - } - Some(expr) - }) - } } /// Generic framework for recursive bottom-up expression transforms. Transformations return @@ -604,6 +580,25 @@ impl> std::ops::Div for Expression { } } +/// Retrieves the set of column names referenced by an expression. +#[derive(Default)] +pub(crate) struct GetColumnReferences<'a> { + references: HashSet<&'a ColumnName>, +} + +impl<'a> GetColumnReferences<'a> { + pub(crate) fn into_inner(self) -> HashSet<&'a ColumnName> { + self.references + } +} + +impl<'a> ExpressionTransform<'a> for GetColumnReferences<'a> { + fn transform_column(&mut self, name: &'a ColumnName) -> Option> { + self.references.insert(name); + Some(Cow::Borrowed(name)) + } +} + /// An expression "transform" that doesn't actually change the expression at all. Instead, it /// measures the maximum depth of a expression, with a depth limit to prevent stack overflow. Useful /// for verifying that a expression has reasonable depth before attempting to work with it. diff --git a/kernel/src/predicates/mod.rs b/kernel/src/kernel_predicates/mod.rs similarity index 81% rename from kernel/src/predicates/mod.rs rename to kernel/src/kernel_predicates/mod.rs index e47da293f..8ded4381a 100644 --- a/kernel/src/predicates/mod.rs +++ b/kernel/src/kernel_predicates/mod.rs @@ -55,7 +55,7 @@ mod tests; /// NOTE: The error-handling semantics of this trait's scalar-based predicate evaluation may differ /// from those of the engine's expression evaluation, because kernel expressions don't include the /// necessary type information to reliably detect all type errors. -pub(crate) trait PredicateEvaluator { +pub(crate) trait KernelPredicateEvaluator { type Output; /// A (possibly inverted) scalar NULL test, e.g. ` IS [NOT] NULL`. @@ -67,29 +67,11 @@ pub(crate) trait PredicateEvaluator { /// A (possibly inverted) NULL check, e.g. ` IS [NOT] NULL`. fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option; - /// A less-than comparison, e.g. ` < `. - /// - /// NOTE: Caller is responsible to commute and/or invert the operation if needed, - /// e.g. `NOT( < )` becomes ` <= `. - fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option; - - /// A less-than-or-equal comparison, e.g. ` <= ` - /// - /// NOTE: Caller is responsible to commute and/or invert the operation if needed, - /// e.g. `NOT( <= )` becomes ` < `. - fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option; + /// A (possibly inverted) less-than comparison, e.g. ` < `. + fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option; - /// A greater-than comparison, e.g. ` > ` - /// - /// NOTE: Caller is responsible to commute and/or invert the operation if needed, - /// e.g. `NOT( > )` becomes ` >= `. - fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option; - - /// A greater-than-or-equal comparison, e.g. ` >= ` - /// - /// NOTE: Caller is responsible to commute and/or invert the operation if needed, - /// e.g. `NOT( >= )` becomes ` > `. - fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option; + /// A (possibly inverted) less-than-or-equal comparison, e.g. ` <= ` + fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option; /// A (possibly inverted) equality comparison, e.g. ` = ` or ` != `. /// @@ -210,17 +192,17 @@ pub(crate) trait PredicateEvaluator { return None; } }; - match (op, inverted) { - (Plus | Minus | Multiply | Divide, _) => None, // Unsupported - not boolean output - (LessThan, false) | (GreaterThanOrEqual, true) => self.eval_lt(col, val), - (LessThanOrEqual, false) | (GreaterThan, true) => self.eval_le(col, val), - (GreaterThan, false) | (LessThanOrEqual, true) => self.eval_gt(col, val), - (GreaterThanOrEqual, false) | (LessThan, true) => self.eval_ge(col, val), - (Equal, _) => self.eval_eq(col, val, inverted), - (NotEqual, _) => self.eval_eq(col, val, !inverted), - (Distinct, _) => self.eval_distinct(col, val, inverted), - (In, _) => self.eval_in(col, val, inverted), - (NotIn, _) => self.eval_in(col, val, !inverted), + match op { + Plus | Minus | Multiply | Divide => None, // Unsupported - not boolean output + LessThan => self.eval_lt(col, val, inverted), + GreaterThanOrEqual => self.eval_lt(col, val, !inverted), + LessThanOrEqual => self.eval_le(col, val, inverted), + GreaterThan => self.eval_le(col, val, !inverted), + Equal => self.eval_eq(col, val, inverted), + NotEqual => self.eval_eq(col, val, !inverted), + Distinct => self.eval_distinct(col, val, inverted), + In => self.eval_in(col, val, inverted), + NotIn => self.eval_in(col, val, !inverted), } } @@ -426,16 +408,16 @@ pub(crate) trait PredicateEvaluator { } } -/// A collection of provided methods from the [`PredicateEvaluator`] trait, factored out to allow +/// A collection of provided methods from the [`KernelPredicateEvaluator`] trait, factored out to allow /// reuse by multiple bool-output predicate evaluator implementations. -pub(crate) struct PredicateEvaluatorDefaults; -impl PredicateEvaluatorDefaults { - /// Directly null-tests a scalar. See [`PredicateEvaluator::eval_scalar_is_null`]. +pub(crate) struct KernelPredicateEvaluatorDefaults; +impl KernelPredicateEvaluatorDefaults { + /// Directly null-tests a scalar. See [`KernelPredicateEvaluator::eval_scalar_is_null`]. pub(crate) fn eval_scalar_is_null(val: &Scalar, inverted: bool) -> Option { Some(val.is_null() != inverted) } - /// Directly evaluates a boolean scalar. See [`PredicateEvaluator::eval_scalar`]. + /// Directly evaluates a boolean scalar. See [`KernelPredicateEvaluator::eval_scalar`]. pub(crate) fn eval_scalar(val: &Scalar, inverted: bool) -> Option { match val { Scalar::Boolean(val) => Some(*val != inverted), @@ -456,7 +438,7 @@ impl PredicateEvaluatorDefaults { Some(matched != inverted) } - /// Directly evaluates a boolean comparison. See [`PredicateEvaluator::eval_binary_scalars`]. + /// Directly evaluates a boolean comparison. See [`KernelPredicateEvaluator::eval_binary_scalars`]. pub(crate) fn eval_binary_scalars( op: BinaryOperator, left: &Scalar, @@ -479,7 +461,7 @@ impl PredicateEvaluatorDefaults { } /// Finishes evaluating a (possibly inverted) variadic operation. See - /// [`PredicateEvaluator::finish_eval_variadic`]. + /// [`KernelPredicateEvaluator::finish_eval_variadic`]. /// /// The inputs were already inverted by the caller, if needed. /// @@ -511,7 +493,7 @@ impl PredicateEvaluatorDefaults { } } -/// Resolves columns as scalars, as a building block for [`DefaultPredicateEvaluator`]. +/// Resolves columns as scalars, as a building block for [`DefaultKernelPredicateEvaluator`]. pub(crate) trait ResolveColumnAsScalar { fn resolve_column(&self, col: &ColumnName) -> Option; } @@ -534,8 +516,6 @@ impl ResolveColumnAsScalar for EmptyColumnResolver { } } -// In testing, it is convenient to just build a hashmap of scalar values. -#[cfg(test)] impl ResolveColumnAsScalar for std::collections::HashMap { fn resolve_column(&self, col: &ColumnName) -> Option { self.get(col).cloned() @@ -544,17 +524,17 @@ impl ResolveColumnAsScalar for std::collections::HashMap { /// A predicate evaluator that directly evaluates the predicate to produce an `Option` /// result. Column resolution is handled by an embedded [`ResolveColumnAsScalar`] instance. -pub(crate) struct DefaultPredicateEvaluator { +pub(crate) struct DefaultKernelPredicateEvaluator { resolver: R, } -impl DefaultPredicateEvaluator { +impl DefaultKernelPredicateEvaluator { // Convenient thin wrapper fn resolve_column(&self, col: &ColumnName) -> Option { self.resolver.resolve_column(col) } } -impl From for DefaultPredicateEvaluator { +impl From for DefaultKernelPredicateEvaluator { fn from(resolver: R) -> Self { Self { resolver } } @@ -563,15 +543,15 @@ impl From for DefaultPredicateEvaluator PredicateEvaluator for DefaultPredicateEvaluator { +impl KernelPredicateEvaluator for DefaultKernelPredicateEvaluator { type Output = bool; fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option { - PredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted) + KernelPredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted) } fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option { - PredicateEvaluatorDefaults::eval_scalar(val, inverted) + KernelPredicateEvaluatorDefaults::eval_scalar(val, inverted) } fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option { @@ -579,24 +559,14 @@ impl PredicateEvaluator for DefaultPredicateEvaluator< self.eval_scalar_is_null(&col, inverted) } - fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option { - let col = self.resolve_column(col)?; - self.eval_binary_scalars(BinaryOperator::LessThan, &col, val, false) - } - - fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option { - let col = self.resolve_column(col)?; - self.eval_binary_scalars(BinaryOperator::LessThanOrEqual, &col, val, false) - } - - fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option { + fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { let col = self.resolve_column(col)?; - self.eval_binary_scalars(BinaryOperator::GreaterThan, &col, val, false) + self.eval_binary_scalars(BinaryOperator::LessThan, &col, val, inverted) } - fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option { + fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { let col = self.resolve_column(col)?; - self.eval_binary_scalars(BinaryOperator::GreaterThanOrEqual, &col, val, false) + self.eval_binary_scalars(BinaryOperator::LessThanOrEqual, &col, val, inverted) } fn eval_eq(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { @@ -611,7 +581,7 @@ impl PredicateEvaluator for DefaultPredicateEvaluator< right: &Scalar, inverted: bool, ) -> Option { - PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted) + KernelPredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted) } fn eval_binary_columns( @@ -632,7 +602,7 @@ impl PredicateEvaluator for DefaultPredicateEvaluator< exprs: impl IntoIterator>, inverted: bool, ) -> Option { - PredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted) + KernelPredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted) } } @@ -660,10 +630,10 @@ pub(crate) trait DataSkippingPredicateEvaluator { /// Retrieves the row count of a column (parquet footers always include this stat). fn get_rowcount_stat(&self) -> Option; - /// See [`PredicateEvaluator::eval_scalar_is_null`] + /// See [`KernelPredicateEvaluator::eval_scalar_is_null`] fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option; - /// See [`PredicateEvaluator::eval_scalar`] + /// See [`KernelPredicateEvaluator::eval_scalar`] fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option; /// For IS NULL (IS NOT NULL), we can only skip the file if all-null (no-null). Any other @@ -675,7 +645,7 @@ pub(crate) trait DataSkippingPredicateEvaluator { /// however, so the worst that can happen is we fail to skip an unnecessary file. fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option; - /// See [`PredicateEvaluator::eval_binary_scalars`] + /// See [`KernelPredicateEvaluator::eval_binary_scalars`] fn eval_binary_scalars( &self, op: BinaryOperator, @@ -684,7 +654,7 @@ pub(crate) trait DataSkippingPredicateEvaluator { inverted: bool, ) -> Option; - /// See [`PredicateEvaluator::finish_eval_variadic`] + /// See [`KernelPredicateEvaluator::finish_eval_variadic`] fn finish_eval_variadic( &self, op: VariadicOperator, @@ -703,7 +673,7 @@ pub(crate) trait DataSkippingPredicateEvaluator { ) -> Option; /// Performs a partial comparison against a column min-stat. See - /// [`PredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics. + /// [`KernelPredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics. fn partial_cmp_min_stat( &self, col: &ColumnName, @@ -716,7 +686,7 @@ pub(crate) trait DataSkippingPredicateEvaluator { } /// Performs a partial comparison against a column max-stat. See - /// [`PredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics. + /// [`KernelPredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics. fn partial_cmp_max_stat( &self, col: &ColumnName, @@ -728,53 +698,51 @@ pub(crate) trait DataSkippingPredicateEvaluator { self.eval_partial_cmp(ord, max, val, inverted) } - /// See [`PredicateEvaluator::eval_lt`] - fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option { - // Given `col < val`: - // Skip if `val` is not greater than _all_ values in [min, max], implies - // Skip if `val <= min AND val <= max` implies - // Skip if `val <= min` implies - // Keep if `NOT(val <= min)` implies - // Keep if `val > min` implies - // Keep if `min < val` - self.partial_cmp_min_stat(col, val, Ordering::Less, false) - } - - /// See [`PredicateEvaluator::eval_le`] - fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option { - // Given `col <= val`: - // Skip if `val` is less than _all_ values in [min, max], implies - // Skip if `val < min AND val < max` implies - // Skip if `val < min` implies - // Keep if `NOT(val < min)` implies - // Keep if `NOT(min > val)` - self.partial_cmp_min_stat(col, val, Ordering::Greater, true) - } - - /// See [`PredicateEvaluator::eval_gt`] - fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option { - // Given `col > val`: - // Skip if `val` is not less than _all_ values in [min, max], implies - // Skip if `val >= min AND val >= max` implies - // Skip if `val >= max` implies - // Keep if `NOT(val >= max)` implies - // Keep if `NOT(max <= val)` implies - // Keep if `max > val` - self.partial_cmp_max_stat(col, val, Ordering::Greater, false) - } - - /// See [`PredicateEvaluator::eval_ge`] - fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option { - // Given `col >= val`: - // Skip if `val is greater than _every_ value in [min, max], implies - // Skip if `val > min AND val > max` implies - // Skip if `val > max` implies - // Keep if `NOT(val > max)` implies - // Keep if `NOT(max < val)` - self.partial_cmp_max_stat(col, val, Ordering::Less, true) - } - - /// See [`PredicateEvaluator::eval_ge`] + /// See [`KernelPredicateEvaluator::eval_lt`] + fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { + if inverted { + // Given `col >= val`: + // Skip if `val is greater than _every_ value in [min, max], implies + // Skip if `val > min AND val > max` implies + // Skip if `val > max` implies + // Keep if `NOT(val > max)` implies + // Keep if `NOT(max < val)` + self.partial_cmp_max_stat(col, val, Ordering::Less, true) + } else { + // Given `col < val`: + // Skip if `val` is not greater than _all_ values in [min, max], implies + // Skip if `val <= min AND val <= max` implies + // Skip if `val <= min` implies + // Keep if `NOT(val <= min)` implies + // Keep if `val > min` implies + // Keep if `min < val` + self.partial_cmp_min_stat(col, val, Ordering::Less, false) + } + } + + /// See [`KernelPredicateEvaluator::eval_le`] + fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { + if inverted { + // Given `col > val`: + // Skip if `val` is not less than _all_ values in [min, max], implies + // Skip if `val >= min AND val >= max` implies + // Skip if `val >= max` implies + // Keep if `NOT(val >= max)` implies + // Keep if `NOT(max <= val)` implies + // Keep if `max > val` + self.partial_cmp_max_stat(col, val, Ordering::Greater, false) + } else { + // Given `col <= val`: + // Skip if `val` is less than _all_ values in [min, max], implies + // Skip if `val < min AND val < max` implies + // Skip if `val < min` implies + // Keep if `NOT(val < min)` implies + // Keep if `NOT(min > val)` + self.partial_cmp_min_stat(col, val, Ordering::Greater, true) + } + } + + /// See [`KernelPredicateEvaluator::eval_ge`] fn eval_eq(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { let (op, exprs) = if inverted { // Column could compare not-equal if min or max value differs from the literal. @@ -795,7 +763,7 @@ pub(crate) trait DataSkippingPredicateEvaluator { } } -impl PredicateEvaluator for T { +impl KernelPredicateEvaluator for T { type Output = T::Output; fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option { @@ -810,20 +778,12 @@ impl PredicateEvaluator for T { self.eval_is_null(col, inverted) } - fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option { - self.eval_lt(col, val) - } - - fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option { - self.eval_le(col, val) - } - - fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option { - self.eval_gt(col, val) + fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { + self.eval_lt(col, val, inverted) } - fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option { - self.eval_ge(col, val) + fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { + self.eval_le(col, val, inverted) } fn eval_eq(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option { diff --git a/kernel/src/predicates/parquet_stats_skipping.rs b/kernel/src/kernel_predicates/parquet_stats_skipping.rs similarity index 86% rename from kernel/src/predicates/parquet_stats_skipping.rs rename to kernel/src/kernel_predicates/parquet_stats_skipping.rs index ff7536f40..492277d5a 100644 --- a/kernel/src/predicates/parquet_stats_skipping.rs +++ b/kernel/src/kernel_predicates/parquet_stats_skipping.rs @@ -1,6 +1,6 @@ //! An implementation of data skipping that leverages parquet stats from the file footer. use crate::expressions::{BinaryOperator, ColumnName, Scalar, VariadicOperator}; -use crate::predicates::{DataSkippingPredicateEvaluator, PredicateEvaluatorDefaults}; +use crate::kernel_predicates::{DataSkippingPredicateEvaluator, KernelPredicateEvaluatorDefaults}; use crate::schema::DataType; use std::cmp::Ordering; @@ -57,15 +57,15 @@ impl DataSkippingPredicateEvaluator for T { val: &Scalar, inverted: bool, ) -> Option { - PredicateEvaluatorDefaults::partial_cmp_scalars(ord, &col, val, inverted) + KernelPredicateEvaluatorDefaults::partial_cmp_scalars(ord, &col, val, inverted) } fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option { - PredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted) + KernelPredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted) } fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option { - PredicateEvaluatorDefaults::eval_scalar(val, inverted) + KernelPredicateEvaluatorDefaults::eval_scalar(val, inverted) } fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option { @@ -83,7 +83,7 @@ impl DataSkippingPredicateEvaluator for T { right: &Scalar, inverted: bool, ) -> Option { - PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted) + KernelPredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted) } fn finish_eval_variadic( @@ -92,6 +92,6 @@ impl DataSkippingPredicateEvaluator for T { exprs: impl IntoIterator>, inverted: bool, ) -> Option { - PredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted) + KernelPredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted) } } diff --git a/kernel/src/predicates/parquet_stats_skipping/tests.rs b/kernel/src/kernel_predicates/parquet_stats_skipping/tests.rs similarity index 99% rename from kernel/src/predicates/parquet_stats_skipping/tests.rs rename to kernel/src/kernel_predicates/parquet_stats_skipping/tests.rs index 3eeb49758..8b7e1ffa7 100644 --- a/kernel/src/predicates/parquet_stats_skipping/tests.rs +++ b/kernel/src/kernel_predicates/parquet_stats_skipping/tests.rs @@ -1,6 +1,6 @@ use super::*; use crate::expressions::{column_expr, Expression as Expr}; -use crate::predicates::PredicateEvaluator as _; +use crate::kernel_predicates::KernelPredicateEvaluator as _; use crate::DataType; const TRUE: Option = Some(true); diff --git a/kernel/src/predicates/tests.rs b/kernel/src/kernel_predicates/tests.rs similarity index 95% rename from kernel/src/predicates/tests.rs rename to kernel/src/kernel_predicates/tests.rs index fdeda8305..5c498b020 100644 --- a/kernel/src/predicates/tests.rs +++ b/kernel/src/kernel_predicates/tests.rs @@ -43,7 +43,7 @@ fn test_default_eval_scalar() { ]; for (value, inverted, expect) in test_cases.into_iter() { assert_eq!( - PredicateEvaluatorDefaults::eval_scalar(&value, inverted), + KernelPredicateEvaluatorDefaults::eval_scalar(&value, inverted), expect, "value: {value:?} inverted: {inverted}" ); @@ -100,7 +100,7 @@ fn test_default_partial_cmp_scalars() { ]; // scalars of different types are always incomparable - let compare = PredicateEvaluatorDefaults::partial_cmp_scalars; + let compare = KernelPredicateEvaluatorDefaults::partial_cmp_scalars; for (i, a) in smaller_values.iter().enumerate() { for b in smaller_values.iter().skip(i + 1) { for op in [Less, Equal, Greater] { @@ -193,7 +193,7 @@ fn test_eval_binary_scalars() { let smaller_value = Scalar::Long(1); let larger_value = Scalar::Long(10); for inverted in [true, false] { - let compare = PredicateEvaluatorDefaults::eval_binary_scalars; + let compare = KernelPredicateEvaluatorDefaults::eval_binary_scalars; expect_eq!( compare(Equal, &smaller_value, &smaller_value, inverted), Some(!inverted), @@ -269,7 +269,7 @@ fn test_eval_binary_columns() { (column_name!("x"), Scalar::from(1)), (column_name!("y"), Scalar::from(10)), ]); - let filter = DefaultPredicateEvaluator::from(columns); + let filter = DefaultKernelPredicateEvaluator::from(columns); let x = column_expr!("x"); let y = column_expr!("y"); for inverted in [true, false] { @@ -307,7 +307,7 @@ fn test_eval_variadic() { (&[Some(false), Some(true), None], Some(false), Some(true)), (&[Some(true), Some(false), None], Some(false), Some(true)), ]; - let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver); + let filter = DefaultKernelPredicateEvaluator::from(UnimplementedColumnResolver); for (inputs, expect_and, expect_or) in test_cases.iter() { let inputs: Vec<_> = inputs .iter() @@ -343,7 +343,7 @@ fn test_eval_column() { ]; let col = &column_name!("x"); for (input, expect) in &test_cases { - let filter = DefaultPredicateEvaluator::from(input.clone()); + let filter = DefaultKernelPredicateEvaluator::from(input.clone()); for inverted in [true, false] { expect_eq!( filter.eval_column(col, inverted), @@ -362,7 +362,7 @@ fn test_eval_not() { (Scalar::Null(DataType::BOOLEAN), None), (Scalar::Long(1), None), ]; - let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver); + let filter = DefaultKernelPredicateEvaluator::from(UnimplementedColumnResolver); for (input, expect) in test_cases { let input = input.into(); for inverted in [true, false] { @@ -378,7 +378,7 @@ fn test_eval_not() { #[test] fn test_eval_is_null() { let expr = column_expr!("x"); - let filter = DefaultPredicateEvaluator::from(Scalar::from(1)); + let filter = DefaultKernelPredicateEvaluator::from(Scalar::from(1)); expect_eq!( filter.eval_unary(UnaryOperator::IsNull, &expr, true), Some(true), @@ -408,7 +408,7 @@ fn test_eval_distinct() { let one = &Scalar::from(1); let two = &Scalar::from(2); let null = &Scalar::Null(DataType::INTEGER); - let filter = DefaultPredicateEvaluator::from(one.clone()); + let filter = DefaultKernelPredicateEvaluator::from(one.clone()); let col = &column_name!("x"); expect_eq!( filter.eval_distinct(col, one, true), @@ -441,7 +441,7 @@ fn test_eval_distinct() { "DISTINCT(x, NULL) (x = 1)" ); - let filter = DefaultPredicateEvaluator::from(null.clone()); + let filter = DefaultKernelPredicateEvaluator::from(null.clone()); expect_eq!( filter.eval_distinct(col, one, true), Some(false), @@ -470,7 +470,7 @@ fn test_eval_distinct() { fn eval_binary() { let col = column_expr!("x"); let val = Expression::literal(10); - let filter = DefaultPredicateEvaluator::from(Scalar::from(1)); + let filter = DefaultKernelPredicateEvaluator::from(Scalar::from(1)); // unsupported expect_eq!( @@ -585,8 +585,8 @@ fn test_sql_where() { const NULL: Expr = Expr::Literal(Scalar::Null(DataType::BOOLEAN)); const FALSE: Expr = Expr::Literal(Scalar::Boolean(false)); const TRUE: Expr = Expr::Literal(Scalar::Boolean(true)); - let null_filter = DefaultPredicateEvaluator::from(NullColumnResolver); - let empty_filter = DefaultPredicateEvaluator::from(EmptyColumnResolver); + let null_filter = DefaultKernelPredicateEvaluator::from(NullColumnResolver); + let empty_filter = DefaultKernelPredicateEvaluator::from(EmptyColumnResolver); // Basic sanity check expect_eq!(null_filter.eval_sql_where(&VAL), None, "WHERE {VAL}"); diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 8dde21afe..d6797ba51 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -35,15 +35,15 @@ //! //! ## Expression handling //! -//! Expression handling is done via the [`ExpressionHandler`], which in turn allows the creation of +//! Expression handling is done via the [`EvaluationHandler`], which in turn allows the creation of //! [`ExpressionEvaluator`]s. These evaluators are created for a specific predicate [`Expression`] //! and allow evaluation of that predicate for a specific batches of data. //! //! ## File system interactions //! //! Delta Kernel needs to perform some basic operations against file systems like listing and -//! reading files. These interactions are encapsulated in the [`FileSystemClient`] trait. -//! Implementors must take care that all assumptions on the behavior if the functions - like sorted +//! reading files. These interactions are encapsulated in the [`StorageHandler`] trait. +//! Implementers must take care that all assumptions on the behavior if the functions - like sorted //! results - are respected. //! //! ## Reading log and data files @@ -51,7 +51,7 @@ //! Delta Kernel requires the capability to read and write json files and read parquet files, which //! is exposed via the [`JsonHandler`] and [`ParquetHandler`] respectively. When reading files, //! connectors are asked to provide the context information it requires to execute the actual -//! operation. This is done by invoking methods on the [`FileSystemClient`] trait. +//! operation. This is done by invoking methods on the [`StorageHandler`] trait. #![cfg_attr(all(doc, NIGHTLY_CHANNEL), feature(doc_auto_cfg))] #![warn( @@ -74,6 +74,7 @@ use url::Url; use self::schema::{DataType, SchemaRef}; pub mod actions; +mod checkpoint; pub mod engine_data; pub mod error; pub mod expressions; @@ -87,7 +88,9 @@ pub mod table_features; pub mod table_properties; pub mod transaction; -pub(crate) mod predicates; +pub mod arrow; +pub(crate) mod kernel_predicates; +pub mod parquet; pub(crate) mod utils; #[cfg(feature = "developer-visibility")] @@ -95,6 +98,11 @@ pub mod path; #[cfg(not(feature = "developer-visibility"))] pub(crate) mod path; +#[cfg(feature = "developer-visibility")] +pub mod log_replay; +#[cfg(not(feature = "developer-visibility"))] +pub(crate) mod log_replay; + #[cfg(feature = "developer-visibility")] pub mod log_segment; #[cfg(not(feature = "developer-visibility"))] @@ -106,6 +114,10 @@ pub use error::{DeltaResult, Error}; pub use expressions::{Expression, ExpressionRef}; pub use table::Table; +use expressions::literal_expression_transform::LiteralExpressionTransform; +use expressions::Scalar; +use schema::{SchemaTransform, StructField, StructType}; + #[cfg(any( feature = "default-engine", feature = "sync-engine", @@ -320,7 +332,7 @@ pub trait ExpressionEvaluator: AsAny { /// /// Delta Kernel can use this handler to evaluate predicate on partition filters, /// fill up partition column values and any computation on data using Expressions. -pub trait ExpressionHandler: AsAny { +pub trait EvaluationHandler: AsAny { /// Create an [`ExpressionEvaluator`] that can evaluate the given [`Expression`] /// on columnar batches with the given [`Schema`] to produce data of [`DataType`]. /// @@ -332,22 +344,61 @@ pub trait ExpressionHandler: AsAny { /// /// [`Schema`]: crate::schema::StructType /// [`DataType`]: crate::schema::DataType - fn get_evaluator( + fn new_expression_evaluator( &self, schema: SchemaRef, expression: Expression, output_type: DataType, ) -> Arc; + + /// Create a single-row all-null-value [`EngineData`] with the schema specified by + /// `output_schema`. + // NOTE: we should probably allow DataType instead of SchemaRef, but can expand that in the + // future. + fn null_row(&self, output_schema: SchemaRef) -> DeltaResult>; } +/// Internal trait to allow us to have a private `create_one` API that's implemented for all +/// EvaluationHandlers. +// For some reason rustc doesn't detect it's usage so we allow(dead_code) here... +#[allow(dead_code)] +trait EvaluationHandlerExtension: EvaluationHandler { + /// Create a single-row [`EngineData`] by applying the given schema to the leaf-values given in + /// `values`. + // Note: we will stick with a Schema instead of DataType (more constrained can expand in + // future) + fn create_one(&self, schema: SchemaRef, values: &[Scalar]) -> DeltaResult> { + // just get a single int column (arbitrary) + let null_row_schema = Arc::new(StructType::new(vec![StructField::nullable( + "null_col", + DataType::INTEGER, + )])); + let null_row = self.null_row(null_row_schema.clone())?; + + // Convert schema and leaf values to an expression + let mut schema_transform = LiteralExpressionTransform::new(values); + schema_transform.transform_struct(schema.as_ref()); + let row_expr = schema_transform.try_into_expr()?; + + let eval = self.new_expression_evaluator(null_row_schema, row_expr, schema.into()); + eval.evaluate(null_row.as_ref()) + } +} + +// Auto-implement the extension trait for all EvaluationHandlers +impl EvaluationHandlerExtension for T {} + /// Provides file system related functionalities to Delta Kernel. /// -/// Delta Kernel uses this client whenever it needs to access the underlying +/// Delta Kernel uses this handler whenever it needs to access the underlying /// file system where the Delta table is present. Connector implementation of /// this trait can hide filesystem specific details from Delta Kernel. -pub trait FileSystemClient: AsAny { - /// List the paths in the same directory that are lexicographically greater or equal to +pub trait StorageHandler: AsAny { + /// List the paths in the same directory that are lexicographically greater than /// (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + /// + /// If the path is directory-like (ends with '/'), the result should contain + /// all the files in the directory. fn list_from(&self, path: &Url) -> DeltaResult>>>; @@ -360,7 +411,7 @@ pub trait FileSystemClient: AsAny { /// Provides JSON handling functionality to Delta Kernel. /// -/// Delta Kernel can use this client to parse JSON strings into Row or read content from JSON files. +/// Delta Kernel can use this handler to parse JSON strings into Row or read content from JSON files. /// Connectors can leverage this trait to provide their best implementation of the JSON parsing /// capability to Delta Kernel. pub trait JsonHandler: AsAny { @@ -457,17 +508,17 @@ pub trait ParquetHandler: AsAny { /// Engines/Connectors are expected to pass an implementation of this trait when reading a Delta /// table. pub trait Engine: AsAny { - /// Get the connector provided [`ExpressionHandler`]. - fn get_expression_handler(&self) -> Arc; + /// Get the connector provided [`EvaluationHandler`]. + fn evaluation_handler(&self) -> Arc; - /// Get the connector provided [`FileSystemClient`] - fn get_file_system_client(&self) -> Arc; + /// Get the connector provided [`StorageHandler`] + fn storage_handler(&self) -> Arc; /// Get the connector provided [`JsonHandler`]. - fn get_json_handler(&self) -> Arc; + fn json_handler(&self) -> Arc; /// Get the connector provided [`ParquetHandler`]. - fn get_parquet_handler(&self) -> Arc; + fn parquet_handler(&self) -> Arc; } // we have an 'internal' feature flag: default-engine-base, which is actually just the shared diff --git a/kernel/src/log_replay.rs b/kernel/src/log_replay.rs new file mode 100644 index 000000000..c9a58492f --- /dev/null +++ b/kernel/src/log_replay.rs @@ -0,0 +1,308 @@ +//! This module provides log replay utilities. +//! +//! Log replay is the process of transforming an iterator of action batches (read from Delta +//! transaction logs) into an iterator of filtered/transformed actions for specific use cases. +//! The logs, which record all table changes as JSON entries, are processed batch by batch, +//! typically from newest to oldest. +//! +//! Log replay is currently implemented for table scans, which filter and apply transformations +//! to produce file actions which builds the view of the table state at a specific point in time. +//! Future extensions will support additional log replay processors beyond the current use case. +//! (e.g. checkpointing: filter actions to include only those needed to rebuild table state) +//! +//! This module provides structures for efficient batch processing, focusing on file action +//! deduplication with `FileActionDeduplicator` which tracks unique files across log batches +//! to minimize memory usage for tables with extensive history. + +use std::collections::HashSet; + +use crate::actions::deletion_vector::DeletionVectorDescriptor; +use crate::engine_data::{GetData, TypedGetData}; +use crate::scan::data_skipping::DataSkippingFilter; +use crate::{DeltaResult, EngineData}; + +use tracing::debug; + +/// The subset of file action fields that uniquely identifies it in the log, used for deduplication +/// of adds and removes during log replay. +#[derive(Debug, Hash, Eq, PartialEq)] +pub(crate) struct FileActionKey { + pub(crate) path: String, + pub(crate) dv_unique_id: Option, +} +impl FileActionKey { + pub(crate) fn new(path: impl Into, dv_unique_id: Option) -> Self { + let path = path.into(); + Self { path, dv_unique_id } + } +} + +/// Maintains state and provides functionality for deduplicating file actions during log replay. +/// +/// This struct is embedded in visitors to track which files have been seen across multiple +/// log batches. Since logs are processed newest-to-oldest, this deduplicator ensures that each +/// unique file (identified by path and deletion vector ID) is processed only once. Performing +/// deduplication at the visitor level avoids having to load all actions into memory at once, +/// significantly reducing memory usage for large Delta tables with extensive history. +/// +/// TODO: Modify deduplication to track only file paths instead of (path, dv_unique_id). +/// More info here: https://github.com/delta-io/delta-kernel-rs/issues/701 +pub(crate) struct FileActionDeduplicator<'seen> { + /// A set of (data file path, dv_unique_id) pairs that have been seen thus + /// far in the log for deduplication. This is a mutable reference to the set + /// of seen file keys that persists across multiple log batches. + seen_file_keys: &'seen mut HashSet, + // TODO: Consider renaming to `is_commit_batch`, `deduplicate_batch`, or `save_batch` + // to better reflect its role in deduplication logic. + /// Whether we're processing a log batch (as opposed to a checkpoint) + is_log_batch: bool, + /// Index of the getter containing the add.path column + add_path_index: usize, + /// Index of the getter containing the remove.path column + remove_path_index: usize, + /// Starting index for add action deletion vector columns + add_dv_start_index: usize, + /// Starting index for remove action deletion vector columns + remove_dv_start_index: usize, +} + +impl<'seen> FileActionDeduplicator<'seen> { + pub(crate) fn new( + seen_file_keys: &'seen mut HashSet, + is_log_batch: bool, + add_path_index: usize, + remove_path_index: usize, + add_dv_start_index: usize, + remove_dv_start_index: usize, + ) -> Self { + Self { + seen_file_keys, + is_log_batch, + add_path_index, + remove_path_index, + add_dv_start_index, + remove_dv_start_index, + } + } + + /// Checks if log replay already processed this logical file (in which case the current action + /// should be ignored). If not already seen, register it so we can recognize future duplicates. + /// Returns `true` if we have seen the file and should ignore it, `false` if we have not seen it + /// and should process it. + pub(crate) fn check_and_record_seen(&mut self, key: FileActionKey) -> bool { + // Note: each (add.path + add.dv_unique_id()) pair has a + // unique Add + Remove pair in the log. For example: + // https://github.com/delta-io/delta/blob/master/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json + + if self.seen_file_keys.contains(&key) { + debug!( + "Ignoring duplicate ({}, {:?}) in scan, is log {}", + key.path, key.dv_unique_id, self.is_log_batch + ); + true + } else { + debug!( + "Including ({}, {:?}) in scan, is log {}", + key.path, key.dv_unique_id, self.is_log_batch + ); + if self.is_log_batch { + // Remember file actions from this batch so we can ignore duplicates as we process + // batches from older commit and/or checkpoint files. We don't track checkpoint + // batches because they are already the oldest actions and never replace anything. + self.seen_file_keys.insert(key); + } + false + } + } + + /// Extracts the deletion vector unique ID if it exists. + /// + /// This function retrieves the necessary fields for constructing a deletion vector unique ID + /// by accessing `getters` at `dv_start_index` and the following two indices. Specifically: + /// - `dv_start_index` retrieves the storage type (`deletionVector.storageType`). + /// - `dv_start_index + 1` retrieves the path or inline deletion vector (`deletionVector.pathOrInlineDv`). + /// - `dv_start_index + 2` retrieves the optional offset (`deletionVector.offset`). + fn extract_dv_unique_id<'a>( + &self, + i: usize, + getters: &[&'a dyn GetData<'a>], + dv_start_index: usize, + ) -> DeltaResult> { + match getters[dv_start_index].get_opt(i, "deletionVector.storageType")? { + Some(storage_type) => { + let path_or_inline = + getters[dv_start_index + 1].get(i, "deletionVector.pathOrInlineDv")?; + let offset = getters[dv_start_index + 2].get_opt(i, "deletionVector.offset")?; + + Ok(Some(DeletionVectorDescriptor::unique_id_from_parts( + storage_type, + path_or_inline, + offset, + ))) + } + None => Ok(None), + } + } + + /// Extracts a file action key and determines if it's an add operation. + /// This method examines the data at the given index using the provided getters + /// to identify whether a file action exists and what type it is. + /// + /// # Arguments + /// + /// * `i` - Index position in the data structure to examine + /// * `getters` - Collection of data getter implementations used to access the data + /// * `skip_removes` - Whether to skip remove actions when extracting file actions + /// + /// # Returns + /// + /// * `Ok(Some((key, is_add)))` - When a file action is found, returns the key and whether it's an add operation + /// * `Ok(None)` - When no file action is found + /// * `Err(...)` - On any error during extraction + pub(crate) fn extract_file_action<'a>( + &self, + i: usize, + getters: &[&'a dyn GetData<'a>], + skip_removes: bool, + ) -> DeltaResult> { + // Try to extract an add action by the required path column + if let Some(path) = getters[self.add_path_index].get_str(i, "add.path")? { + let dv_unique_id = self.extract_dv_unique_id(i, getters, self.add_dv_start_index)?; + return Ok(Some((FileActionKey::new(path, dv_unique_id), true))); + } + + // The AddRemoveDedupVisitor skips remove actions when extracting file actions from a checkpoint batch. + if skip_removes { + return Ok(None); + } + + // Try to extract a remove action by the required path column + if let Some(path) = getters[self.remove_path_index].get_str(i, "remove.path")? { + let dv_unique_id = self.extract_dv_unique_id(i, getters, self.remove_dv_start_index)?; + return Ok(Some((FileActionKey::new(path, dv_unique_id), false))); + } + + // No file action found + Ok(None) + } + + /// Returns whether we are currently processing a log batch. + /// + /// `true` indicates we are processing a batch from a commit file. + /// `false` indicates we are processing a batch from a checkpoint. + pub(crate) fn is_log_batch(&self) -> bool { + self.is_log_batch + } +} + +/// A trait for processing batches of actions from Delta transaction logs during log replay. +/// +/// Log replay processors scan transaction logs in **reverse chronological order** (newest to oldest), +/// filtering and transforming action batches into specialized output types. These processors: +/// +/// - **Track and deduplicate file actions** to apply appropriate `Remove` actions to corresponding +/// `Add` actions (and omit the file from the log replay output) +/// - **Maintain selection vectors** to indicate which actions in each batch should be included. +/// - **Apply custom filtering logic** based on the processorโ€™s purpose (e.g., checkpointing, scanning). +/// - **Data skipping** filters are applied to the initial selection vector to reduce the number of rows +/// processed by the processor, (if a filter is provided). +/// +/// Implementations: +/// - `ScanLogReplayProcessor`: Used for table scans, this processor filters and selects deduplicated +/// `Add` actions from log batches to reconstruct the view of the table at a specific point in time. +/// Note that scans do not expose `Remove` actions. Data skipping may be applied when a predicate is +/// provided. +/// +/// - `CheckpointLogReplayProcessor` (WIP): Will be responsible for processing log batches to construct +/// V1 spec checkpoint files. Unlike scans, checkpoint processing includes additional actions, such as +/// `Remove`, `Metadata`, and `Protocol`, required to fully reconstruct table state. +/// Data skipping is not applied during checkpoint processing. +/// +/// The `Output` type represents the material result of log replay, and it must implement the +/// `HasSelectionVector` trait to allow filtering of irrelevant rows: +/// +/// - For **scans**, the output type is `ScanMetadata`, which contains the file actions (`Add` +/// actions) that need to be applied to build the table's view, accompanied by a +/// **selection vector** that identifies which rows should be included. A transform vector may +/// also be included to handle schema changes, such as renaming columns or modifying data types. +/// +/// - For **checkpoints**, the output includes the actions necessary to write to the checkpoint file (`Add`, +/// `Remove`, `Metadata`, `Protocol` actions), filtered by the **selection vector** to determine which +/// rows are included in the final checkpoint. +/// +/// TODO: Refactor the Change Data Feed (CDF) processor to use this trait. +pub(crate) trait LogReplayProcessor: Sized { + /// The type of results produced by this processor must implement the + /// `HasSelectionVector` trait to allow filtering out batches with no selected rows. + type Output: HasSelectionVector; + + /// Processes a batch of actions and returns the filtered results. + /// + /// # Arguments + /// - `actions_batch` - A reference to an [`EngineData`] instance representing a batch of actions. + /// - `is_log_batch` - `true` if the batch originates from a commit log, `false` if from a checkpoint. + /// + /// Returns a [`DeltaResult`] containing the processorโ€™s output, which includes only selected actions. + /// + /// Note: Since log replay is stateful, processing may update internal processor state (e.g., deduplication sets). + fn process_actions_batch( + &mut self, + actions_batch: &dyn EngineData, + is_log_batch: bool, + ) -> DeltaResult; + + /// Applies the processor to an actions iterator and filters out empty results. + /// + /// # Arguments + /// * `action_iter` - Iterator of action batches and their source flags + /// + /// Returns an iterator that yields the Output type of the processor. + fn process_actions_iter( + mut self, + action_iter: impl Iterator, bool)>>, + ) -> impl Iterator> { + action_iter + .map(move |action_res| { + let (batch, is_log_batch) = action_res?; + self.process_actions_batch(batch.as_ref(), is_log_batch) + }) + .filter(|res| { + // TODO: Leverage .is_none_or() when msrv = 1.82 + res.as_ref() + .map_or(true, |result| result.has_selected_rows()) + }) + } + + /// Builds the initial selection vector for the action batch, used to filter out rows that + /// are not relevant to the current processor's purpose (e.g., checkpointing, scanning). + /// This method performs a first pass of filtering using an optional [`DataSkippingFilter`]. + /// If no filter is provided, it assumes that all rows should be selected. + /// + /// The selection vector is further updated based on the processor's logic in the + /// `process_actions_batch` method. + /// + /// # Arguments + /// - `batch` - A reference to the batch of actions to be processed. + /// + /// # Returns + /// A `DeltaResult>`, where each boolean indicates if the corresponding row should be included. + /// If no filter is provided, all rows are selected. + fn build_selection_vector(&self, batch: &dyn EngineData) -> DeltaResult> { + match self.data_skipping_filter() { + Some(filter) => filter.apply(batch), + None => Ok(vec![true; batch.len()]), // If no filter is provided, select all rows + } + } + + /// Returns an optional reference to the [`DataSkippingFilter`] used to filter rows + /// when building the initial selection vector in `build_selection_vector`. + /// If `None` is returned, no filter is applied, and all rows are selected. + fn data_skipping_filter(&self) -> Option<&DataSkippingFilter>; +} + +/// This trait is used to determine if a processor's output contains any selected rows. +/// This is used to filter out batches with no selected rows from the log replay results. +pub(crate) trait HasSelectionVector { + /// Check if the selection vector contains at least one selected row + fn has_selected_rows(&self) -> bool; +} diff --git a/kernel/src/log_segment.rs b/kernel/src/log_segment.rs index b4f255c57..e55e1791a 100644 --- a/kernel/src/log_segment.rs +++ b/kernel/src/log_segment.rs @@ -1,13 +1,18 @@ //! Represents a segment of a delta log. [`LogSegment`] wraps a set of checkpoint and commit //! files. -use crate::actions::{get_log_schema, Metadata, Protocol, METADATA_NAME, PROTOCOL_NAME}; +use crate::actions::visitors::SidecarVisitor; +use crate::actions::{ + get_log_schema, Metadata, Protocol, ADD_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, + SIDECAR_NAME, +}; use crate::path::{LogPathFileType, ParsedLogPath}; use crate::schema::SchemaRef; -use crate::snapshot::CheckpointMetadata; +use crate::snapshot::LastCheckpointHint; use crate::utils::require; use crate::{ - DeltaResult, Engine, EngineData, Error, Expression, ExpressionRef, FileSystemClient, Version, + DeltaResult, Engine, EngineData, Error, Expression, ExpressionRef, ParquetHandler, RowVisitor, + StorageHandler, Version, }; use itertools::Itertools; use std::collections::HashMap; @@ -32,10 +37,11 @@ mod tests; /// and in `TableChanges` when built with [`LogSegment::for_table_changes`]. /// /// [`Snapshot`]: crate::snapshot::Snapshot -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct LogSegment { pub end_version: Version, + pub checkpoint_version: Option, pub log_root: Url, /// Sorted commit files in the log segment (ascending) pub ascending_commit_files: Vec, @@ -44,12 +50,18 @@ pub(crate) struct LogSegment { } impl LogSegment { - fn try_new( - ascending_commit_files: Vec, + pub(crate) fn try_new( + mut ascending_commit_files: Vec, checkpoint_parts: Vec, log_root: Url, end_version: Option, ) -> DeltaResult { + // Commit file versions must be greater than the most recent checkpoint version if it exists + let checkpoint_version = checkpoint_parts.first().map(|checkpoint_file| { + ascending_commit_files.retain(|log_path| checkpoint_file.version < log_path.version); + checkpoint_file.version + }); + // We require that commits that are contiguous. In other words, there must be no gap between commit versions. require!( ascending_commit_files @@ -63,35 +75,37 @@ impl LogSegment { // There must be no gap between a checkpoint and the first commit version. Note that // that all checkpoint parts share the same version. - if let (Some(checkpoint_file), Some(commit_file)) = - (checkpoint_parts.first(), ascending_commit_files.first()) + if let (Some(checkpoint_version), Some(commit_file)) = + (checkpoint_version, ascending_commit_files.first()) { require!( - checkpoint_file.version + 1 == commit_file.version, + checkpoint_version + 1 == commit_file.version, Error::InvalidCheckpoint(format!( "Gap between checkpoint version {} and next commit {}", - checkpoint_file.version, commit_file.version, + checkpoint_version, commit_file.version, )) ) } // Get the effective version from chosen files - let version_eff = ascending_commit_files + let effective_version = ascending_commit_files .last() .or(checkpoint_parts.first()) .ok_or(Error::generic("No files in log segment"))? .version; if let Some(end_version) = end_version { require!( - version_eff == end_version, + effective_version == end_version, Error::generic(format!( "LogSegment end version {} not the same as the specified end version {}", - version_eff, end_version + effective_version, end_version )) ); } + Ok(LogSegment { - end_version: version_eff, + end_version: effective_version, + checkpoint_version, log_root, ascending_commit_files, checkpoint_parts, @@ -104,35 +118,28 @@ impl LogSegment { /// parts. All these parts will have the same checkpoint version. /// /// The options for constructing a LogSegment for Snapshot are as follows: - /// - `checkpoint_hint`: a `CheckpointMetadata` to start the log segment from (e.g. from reading the `last_checkpoint` file). + /// - `checkpoint_hint`: a `LastCheckpointHint` to start the log segment from (e.g. from reading the `last_checkpoint` file). /// - `time_travel_version`: The version of the log that the Snapshot will be at. /// /// [`Snapshot`]: crate::snapshot::Snapshot #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn for_snapshot( - fs_client: &dyn FileSystemClient, + storage: &dyn StorageHandler, log_root: Url, - checkpoint_hint: impl Into>, + checkpoint_hint: impl Into>, time_travel_version: impl Into>, ) -> DeltaResult { let time_travel_version = time_travel_version.into(); - let (mut ascending_commit_files, checkpoint_parts) = + let (ascending_commit_files, checkpoint_parts) = match (checkpoint_hint.into(), time_travel_version) { - (Some(cp), None) => { - list_log_files_with_checkpoint(&cp, fs_client, &log_root, None)? - } + (Some(cp), None) => list_log_files_with_checkpoint(&cp, storage, &log_root, None)?, (Some(cp), Some(end_version)) if cp.version <= end_version => { - list_log_files_with_checkpoint(&cp, fs_client, &log_root, Some(end_version))? + list_log_files_with_checkpoint(&cp, storage, &log_root, Some(end_version))? } - _ => list_log_files_with_version(fs_client, &log_root, None, time_travel_version)?, + _ => list_log_files_with_version(storage, &log_root, None, time_travel_version)?, }; - // Commit file versions must be greater than the most recent checkpoint version if it exists - if let Some(checkpoint_file) = checkpoint_parts.first() { - ascending_commit_files.retain(|log_path| checkpoint_file.version < log_path.version); - } - LogSegment::try_new( ascending_commit_files, checkpoint_parts, @@ -147,7 +154,7 @@ impl LogSegment { /// is specified it will be the most recent version by default. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn for_table_changes( - fs_client: &dyn FileSystemClient, + storage: &dyn StorageHandler, log_root: Url, start_version: Version, end_version: impl Into>, @@ -162,7 +169,7 @@ impl LogSegment { } let ascending_commit_files: Vec<_> = - list_log_files(fs_client, &log_root, start_version, end_version)? + list_log_files(storage, &log_root, start_version, end_version)? .filter_ok(|x| x.is_commit()) .try_collect()?; @@ -181,19 +188,21 @@ impl LogSegment { ); LogSegment::try_new(ascending_commit_files, vec![], log_root, end_version) } - /// Read a stream of log data from this log segment. + + /// Read a stream of actions from this log segment. This returns an iterator of (EngineData, + /// bool) pairs, where the boolean flag indicates whether the data was read from a commit file + /// (true) or a checkpoint file (false). /// /// The log files will be read from most recent to oldest. - /// The boolean flags indicates whether the data was read from - /// a commit file (true) or a checkpoint file (false). /// - /// `read_schema` is the schema to read the log files with. This can be used - /// to project the log files to a subset of the columns. + /// `commit_read_schema` is the (physical) schema to read the commit files with, and + /// `checkpoint_read_schema` is the (physical) schema to read checkpoint files with. This can be + /// used to project the log files to a subset of the columns. /// /// `meta_predicate` is an optional expression to filter the log files with. It is _NOT_ the /// query's predicate, but rather a predicate for filtering log files themselves. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] - pub(crate) fn replay( + pub(crate) fn read_actions( &self, engine: &dyn Engine, commit_read_schema: SchemaRef, @@ -209,25 +218,158 @@ impl LogSegment { .map(|f| f.location.clone()) .collect(); let commit_stream = engine - .get_json_handler() + .json_handler() .read_json_files(&commit_files, commit_read_schema, meta_predicate.clone())? .map_ok(|batch| (batch, true)); - let checkpoint_parts: Vec<_> = self + let checkpoint_stream = + self.create_checkpoint_stream(engine, checkpoint_read_schema, meta_predicate)?; + + Ok(commit_stream.chain(checkpoint_stream)) + } + + /// Returns an iterator over checkpoint data, processing sidecar files when necessary. + /// + /// By default, `create_checkpoint_stream` checks for the presence of sidecar files, and + /// reads their contents if present. Checking for sidecar files is skipped if: + /// - The checkpoint is a multi-part checkpoint + /// - The checkpoint read schema does not contain a file action + /// + /// For single-part checkpoints, any referenced sidecar files are processed. These + /// sidecar files contain the actual file actions that would otherwise be + /// stored directly in the checkpoint. The sidecar file batches are chained to the + /// checkpoint batch in the top level iterator to be returned. + fn create_checkpoint_stream( + &self, + engine: &dyn Engine, + checkpoint_read_schema: SchemaRef, + meta_predicate: Option, + ) -> DeltaResult, bool)>> + Send> { + let need_file_actions = checkpoint_read_schema.contains(ADD_NAME) + || checkpoint_read_schema.contains(REMOVE_NAME); + require!( + !need_file_actions || checkpoint_read_schema.contains(SIDECAR_NAME), + Error::invalid_checkpoint( + "If the checkpoint read schema contains file actions, it must contain the sidecar column" + ) + ); + + let checkpoint_file_meta: Vec<_> = self .checkpoint_parts .iter() .map(|f| f.location.clone()) .collect(); - let checkpoint_stream = engine - .get_parquet_handler() - .read_parquet_files(&checkpoint_parts, checkpoint_read_schema, meta_predicate)? - .map_ok(|batch| (batch, false)); - Ok(commit_stream.chain(checkpoint_stream)) + let parquet_handler = engine.parquet_handler(); + + // Historically, we had a shared file reader trait for JSON and Parquet handlers, + // but it was removed to avoid unnecessary coupling. This is a concrete case + // where it *could* have been useful, but for now, we're keeping them separate. + // If similar patterns start appearing elsewhere, we should reconsider that decision. + let actions = match self.checkpoint_parts.first() { + Some(parsed_log_path) if parsed_log_path.extension == "json" => { + engine.json_handler().read_json_files( + &checkpoint_file_meta, + checkpoint_read_schema.clone(), + meta_predicate.clone(), + )? + } + Some(parsed_log_path) if parsed_log_path.extension == "parquet" => parquet_handler + .read_parquet_files( + &checkpoint_file_meta, + checkpoint_read_schema.clone(), + meta_predicate.clone(), + )?, + Some(parsed_log_path) => { + return Err(Error::generic(format!( + "Unsupported checkpoint file type: {}", + parsed_log_path.extension, + ))); + } + // This is the case when there are no checkpoints in the log segment + // so we return an empty iterator + None => Box::new(std::iter::empty()), + }; + + let log_root = self.log_root.clone(); + + let actions_iter = actions + .map(move |checkpoint_batch_result| -> DeltaResult<_> { + let checkpoint_batch = checkpoint_batch_result?; + // This closure maps the checkpoint batch to an iterator of batches + // by chaining the checkpoint batch with sidecar batches if they exist. + + // 1. In the case where the schema does not contain file actions, we return the + // checkpoint batch directly as sidecar files only have to be read when the + // schema contains add/remove action. + // 2. Multi-part checkpoint batches never have sidecar actions, so the batch is + // returned as-is. + let sidecar_content = if need_file_actions && checkpoint_file_meta.len() == 1 { + Self::process_sidecars( + parquet_handler.clone(), // cheap Arc clone + log_root.clone(), + checkpoint_batch.as_ref(), + checkpoint_read_schema.clone(), + meta_predicate.clone(), + )? + } else { + None + }; + + let combined_batches = std::iter::once(Ok(checkpoint_batch)) + .chain(sidecar_content.into_iter().flatten()) + // The boolean flag indicates whether the batch originated from a commit file + // (true) or a checkpoint file (false). + .map_ok(|sidecar_batch| (sidecar_batch, false)); + + Ok(combined_batches) + }) + .flatten_ok() + .map(|result| result?); // result-result to result + + Ok(actions_iter) } - // Get the most up-to-date Protocol and Metadata actions - pub(crate) fn read_metadata(&self, engine: &dyn Engine) -> DeltaResult<(Metadata, Protocol)> { + /// Processes sidecar files for the given checkpoint batch. + /// + /// This function extracts any sidecar file references from the provided batch. + /// Each sidecar file is read and an iterator of file action batches is returned + fn process_sidecars( + parquet_handler: Arc, + log_root: Url, + batch: &dyn EngineData, + checkpoint_read_schema: SchemaRef, + meta_predicate: Option, + ) -> DeltaResult>> + Send>> { + // Visit the rows of the checkpoint batch to extract sidecar file references + let mut visitor = SidecarVisitor::default(); + visitor.visit_rows_of(batch)?; + + // If there are no sidecar files, return early + if visitor.sidecars.is_empty() { + return Ok(None); + } + + let sidecar_files: Vec<_> = visitor + .sidecars + .iter() + .map(|sidecar| sidecar.to_filemeta(&log_root)) + .try_collect()?; + + // Read the sidecar files and return an iterator of sidecar file batches + Ok(Some(parquet_handler.read_parquet_files( + &sidecar_files, + checkpoint_read_schema, + meta_predicate, + )?)) + } + + // Do a lightweight protocol+metadata log replay to find the latest Protocol and Metadata in + // the LogSegment + pub(crate) fn protocol_and_metadata( + &self, + engine: &dyn Engine, + ) -> DeltaResult<(Option, Option)> { let data_batches = self.replay_for_metadata(engine)?; let (mut metadata_opt, mut protocol_opt) = (None, None); for batch in data_batches { @@ -243,7 +385,12 @@ impl LogSegment { break; } } - match (metadata_opt, protocol_opt) { + Ok((metadata_opt, protocol_opt)) + } + + // Get the most up-to-date Protocol and Metadata actions + pub(crate) fn read_metadata(&self, engine: &dyn Engine) -> DeltaResult<(Metadata, Protocol)> { + match self.protocol_and_metadata(engine)? { (Some(m), Some(p)) => Ok((m, p)), (None, Some(_)) => Err(Error::MissingMetadata), (Some(_), None) => Err(Error::MissingProtocol), @@ -265,7 +412,7 @@ impl LogSegment { ))) }); // read the same protocol and metadata schema for both commits and checkpoints - self.replay(engine, schema.clone(), schema, META_PREDICATE.clone()) + self.read_actions(engine, schema.clone(), schema, META_PREDICATE.clone()) } } @@ -274,9 +421,9 @@ impl LogSegment { /// not specified, the files will begin from version number 0. If `end_version` is not specified, files up to /// the most recent version will be included. /// -/// Note: this calls [`FileSystemClient::list_from`] to get the list of log files. +/// Note: this calls [`StorageHandler::list_from`] to get the list of log files. fn list_log_files( - fs_client: &dyn FileSystemClient, + storage: &dyn StorageHandler, log_root: &Url, start_version: impl Into>, end_version: impl Into>, @@ -286,7 +433,7 @@ fn list_log_files( let version_prefix = format!("{:020}", start_version); let start_from = log_root.join(&version_prefix)?; - Ok(fs_client + Ok(storage .list_from(&start_from)? .map(|meta| ParsedLogPath::try_from(meta?)) // TODO this filters out .crc files etc which start with "." - how do we want to use these kind of files? @@ -296,13 +443,17 @@ fn list_log_files( Err(_) => true, })) } + /// List all commit and checkpoint files with versions above the provided `start_version` (inclusive). /// If successful, this returns a tuple `(ascending_commit_files, checkpoint_parts)` of type /// `(Vec, Vec)`. The commit files are guaranteed to be sorted in /// ascending order by version. The elements of `checkpoint_parts` are all the parts of the same /// checkpoint. Checkpoint parts share the same version. -fn list_log_files_with_version( - fs_client: &dyn FileSystemClient, +// TODO: encode some of these guarantees in the output types. e.g. we could have: +// - SortedCommitFiles: Vec, is_ascending: bool, end_version: Version +// - CheckpointParts: Vec, checkpoint_version: Version (guarantee all same version) +pub(crate) fn list_log_files_with_version( + storage: &dyn StorageHandler, log_root: &Url, start_version: Option, end_version: Option, @@ -310,7 +461,7 @@ fn list_log_files_with_version( // We expect 10 commit files per checkpoint, so start with that size. We could adjust this based // on config at some point - let log_files = list_log_files(fs_client, log_root, start_version, end_version)?; + let log_files = list_log_files(storage, log_root, start_version, end_version)?; log_files.process_results(|iter| { let mut commit_files = Vec::with_capacity(10); @@ -399,13 +550,13 @@ fn group_checkpoint_parts(parts: Vec) -> HashMap, ) -> DeltaResult<(Vec, Vec)> { let (commit_files, checkpoint_parts) = list_log_files_with_version( - fs_client, + storage, log_root, Some(checkpoint_metadata.version), end_version, diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs index 5db1c4581..f94b7b736 100644 --- a/kernel/src/log_segment/tests.rs +++ b/kernel/src/log_segment/tests.rs @@ -1,15 +1,33 @@ +use std::sync::LazyLock; use std::{path::PathBuf, sync::Arc}; +use futures::executor::block_on; use itertools::Itertools; use object_store::{memory::InMemory, path::Path, ObjectStore}; use url::Url; +use crate::actions::visitors::AddVisitor; +use crate::actions::{ + get_log_add_schema, get_log_schema, Add, Sidecar, ADD_NAME, METADATA_NAME, REMOVE_NAME, + SIDECAR_NAME, +}; +use crate::engine::arrow_data::ArrowEngineData; use crate::engine::default::executor::tokio::TokioBackgroundExecutor; -use crate::engine::default::filesystem::ObjectStoreFileSystemClient; +use crate::engine::default::filesystem::ObjectStoreStorageHandler; +use crate::engine::default::DefaultEngine; use crate::engine::sync::SyncEngine; use crate::log_segment::LogSegment; -use crate::snapshot::CheckpointMetadata; -use crate::{FileSystemClient, Table}; +use crate::parquet::arrow::ArrowWriter; +use crate::path::ParsedLogPath; +use crate::scan::test_utils::{ + add_batch_simple, add_batch_with_remove, sidecar_batch_with_given_paths, +}; +use crate::snapshot::LastCheckpointHint; +use crate::utils::test_utils::{assert_batch_matches, Action}; +use crate::{ + DeltaResult, Engine, EngineData, Expression, ExpressionRef, FileMeta, RowVisitor, + StorageHandler, Table, +}; use test_utils::delta_path_for_version; // NOTE: In addition to testing the meta-predicate for metadata replay, this test also verifies @@ -63,53 +81,137 @@ fn delta_path_for_multipart_checkpoint(version: u64, part_num: u32, num_parts: u } // Utility method to build a log using a list of log paths and an optional checkpoint hint. The -// CheckpointMetadata is written to `_delta_log/_last_checkpoint`. +// LastCheckpointHint is written to `_delta_log/_last_checkpoint`. fn build_log_with_paths_and_checkpoint( paths: &[Path], - checkpoint_metadata: Option<&CheckpointMetadata>, -) -> (Box, Url) { + checkpoint_metadata: Option<&LastCheckpointHint>, +) -> (Box, Url) { let store = Arc::new(InMemory::new()); let data = bytes::Bytes::from("kernel-data"); // add log files to store - tokio::runtime::Runtime::new() - .expect("create tokio runtime") - .block_on(async { - for path in paths { - store - .put(path, data.clone().into()) - .await - .expect("put log file in store"); - } - if let Some(checkpoint_metadata) = checkpoint_metadata { - let checkpoint_str = - serde_json::to_string(checkpoint_metadata).expect("Serialize checkpoint"); - store - .put( - &Path::from("_delta_log/_last_checkpoint"), - checkpoint_str.into(), - ) - .await - .expect("Write _last_checkpoint"); - } - }); - - let client = ObjectStoreFileSystemClient::new( + block_on(async { + for path in paths { + store + .put(path, data.clone().into()) + .await + .expect("put log file in store"); + } + if let Some(checkpoint_metadata) = checkpoint_metadata { + let checkpoint_str = + serde_json::to_string(checkpoint_metadata).expect("Serialize checkpoint"); + store + .put( + &Path::from("_delta_log/_last_checkpoint"), + checkpoint_str.into(), + ) + .await + .expect("Write _last_checkpoint"); + } + }); + + let storage = ObjectStoreStorageHandler::new( store, false, // don't have ordered listing - Path::from("/"), Arc::new(TokioBackgroundExecutor::new()), ); let table_root = Url::parse("memory:///").expect("valid url"); let log_root = table_root.join("_delta_log/").unwrap(); - (Box::new(client), log_root) + (Box::new(storage), log_root) +} + +// Create an in-memory store and return the store and the URL for the store's _delta_log directory. +fn new_in_memory_store() -> (Arc, Url) { + ( + Arc::new(InMemory::new()), + Url::parse("memory:///") + .unwrap() + .join("_delta_log/") + .unwrap(), + ) +} + +// Writes a record batch obtained from engine data to the in-memory store at a given path. +fn write_parquet_to_store( + store: &Arc, + path: String, + data: Box, +) -> DeltaResult<()> { + let batch = ArrowEngineData::try_from_engine_data(data)?; + let record_batch = batch.record_batch(); + + let mut buffer = vec![]; + let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), None)?; + writer.write(record_batch)?; + writer.close()?; + + block_on(async { store.put(&Path::from(path), buffer.into()).await })?; + + Ok(()) +} + +/// Writes all actions to a _delta_log parquet checkpoint file in the store. +/// This function formats the provided filename into the _delta_log directory. +pub(crate) fn add_checkpoint_to_store( + store: &Arc, + data: Box, + filename: &str, +) -> DeltaResult<()> { + let path = format!("_delta_log/{}", filename); + write_parquet_to_store(store, path, data) +} + +/// Writes all actions to a _delta_log/_sidecars file in the store. +/// This function formats the provided filename into the _sidecars subdirectory. +fn add_sidecar_to_store( + store: &Arc, + data: Box, + filename: &str, +) -> DeltaResult<()> { + let path = format!("_delta_log/_sidecars/{}", filename); + write_parquet_to_store(store, path, data) +} + +/// Writes all actions to a _delta_log json checkpoint file in the store. +/// This function formats the provided filename into the _delta_log directory. +fn write_json_to_store( + store: &Arc, + actions: Vec, + filename: &str, +) -> DeltaResult<()> { + let json_lines: Vec = actions + .into_iter() + .map(|action| serde_json::to_string(&action).expect("action to string")) + .collect(); + let content = json_lines.join("\n"); + let checkpoint_path = format!("_delta_log/{}", filename); + + tokio::runtime::Runtime::new() + .expect("create tokio runtime") + .block_on(async { + store + .put(&Path::from(checkpoint_path), content.into()) + .await + })?; + + Ok(()) +} + +fn create_log_path(path: &str) -> ParsedLogPath { + ParsedLogPath::try_from(FileMeta { + location: Url::parse(path).expect("Invalid file URL"), + last_modified: 0, + size: 0, + }) + .unwrap() + .unwrap() } #[test] -fn build_snapshot_with_unsupported_uuid_checkpoint() { - let (client, log_root) = build_log_with_paths_and_checkpoint( +fn build_snapshot_with_uuid_checkpoint_parquet() { + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -124,21 +226,90 @@ fn build_snapshot_with_unsupported_uuid_checkpoint() { None, ); - let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap(); + let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; assert_eq!(checkpoint_parts.len(), 1); - assert_eq!(checkpoint_parts[0].version, 3); + assert_eq!(checkpoint_parts[0].version, 5); let versions = commit_files.into_iter().map(|x| x.version).collect_vec(); - let expected_versions = vec![4, 5, 6, 7]; + let expected_versions = vec![6, 7]; + assert_eq!(versions, expected_versions); +} + +#[test] +fn build_snapshot_with_uuid_checkpoint_json() { + let (storage, log_root) = build_log_with_paths_and_checkpoint( + &[ + delta_path_for_version(0, "json"), + delta_path_for_version(1, "checkpoint.parquet"), + delta_path_for_version(2, "json"), + delta_path_for_version(3, "checkpoint.parquet"), + delta_path_for_version(4, "json"), + delta_path_for_version(5, "json"), + delta_path_for_version(5, "checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.json"), + delta_path_for_version(6, "json"), + delta_path_for_version(7, "json"), + ], + None, + ); + + let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap(); + let commit_files = log_segment.ascending_commit_files; + let checkpoint_parts = log_segment.checkpoint_parts; + + assert_eq!(checkpoint_parts.len(), 1); + assert_eq!(checkpoint_parts[0].version, 5); + + let versions = commit_files.into_iter().map(|x| x.version).collect_vec(); + let expected_versions = vec![6, 7]; assert_eq!(versions, expected_versions); } +#[test] +fn build_snapshot_with_correct_last_uuid_checkpoint() { + let checkpoint_metadata = LastCheckpointHint { + version: 5, + size: 10, + parts: Some(1), + size_in_bytes: None, + num_of_add_files: None, + checkpoint_schema: None, + checksum: None, + }; + + let (storage, log_root) = build_log_with_paths_and_checkpoint( + &[ + delta_path_for_version(0, "json"), + delta_path_for_version(1, "checkpoint.parquet"), + delta_path_for_version(1, "json"), + delta_path_for_version(2, "json"), + delta_path_for_version(3, "checkpoint.parquet"), + delta_path_for_version(3, "json"), + delta_path_for_version(4, "json"), + delta_path_for_version(5, "checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet"), + delta_path_for_version(5, "json"), + delta_path_for_version(6, "json"), + delta_path_for_version(7, "json"), + ], + Some(&checkpoint_metadata), + ); + + let log_segment = + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap(); + let commit_files = log_segment.ascending_commit_files; + let checkpoint_parts = log_segment.checkpoint_parts; + + assert_eq!(checkpoint_parts.len(), 1); + assert_eq!(commit_files.len(), 2); + assert_eq!(checkpoint_parts[0].version, 5); + assert_eq!(commit_files[0].version, 6); + assert_eq!(commit_files[1].version, 7); +} #[test] fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() { - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_multipart_checkpoint(1, 1, 3), @@ -162,7 +333,7 @@ fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() { None, ); - let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap(); + let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -176,7 +347,7 @@ fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() { #[test] fn build_snapshot_with_out_of_date_last_checkpoint() { - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 3, size: 10, parts: None, @@ -186,7 +357,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint() { checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -201,7 +372,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint() { ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap(); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -213,7 +384,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint() { } #[test] fn build_snapshot_with_correct_last_multipart_checkpoint() { - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 5, size: 10, parts: Some(3), @@ -223,7 +394,7 @@ fn build_snapshot_with_correct_last_multipart_checkpoint() { checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -243,7 +414,7 @@ fn build_snapshot_with_correct_last_multipart_checkpoint() { ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap(); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -256,7 +427,7 @@ fn build_snapshot_with_correct_last_multipart_checkpoint() { #[test] fn build_snapshot_with_missing_checkpoint_part_from_hint_fails() { - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 5, size: 10, parts: Some(3), @@ -266,7 +437,7 @@ fn build_snapshot_with_missing_checkpoint_part_from_hint_fails() { checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -286,12 +457,12 @@ fn build_snapshot_with_missing_checkpoint_part_from_hint_fails() { ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None); assert!(log_segment.is_err()) } #[test] fn build_snapshot_with_bad_checkpoint_hint_fails() { - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 5, size: 10, parts: Some(1), @@ -301,7 +472,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() { checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -320,7 +491,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() { ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None); assert!(log_segment.is_err()) } @@ -328,7 +499,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() { fn build_snapshot_with_missing_checkpoint_part_no_hint() { // Part 2 of 3 is missing from checkpoint 5. The Snapshot should be made of checkpoint // number 3 and commit files 4 to 7. - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -347,7 +518,7 @@ fn build_snapshot_with_missing_checkpoint_part_no_hint() { None, ); - let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap(); + let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -365,7 +536,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo // When the _last_checkpoint is out of date and the most recent checkpoint is incomplete, the // Snapshot should be made of the most recent complete checkpoint and the commit files that // follow it. - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 3, size: 10, parts: None, @@ -375,7 +546,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -393,7 +564,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap(); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -407,7 +578,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo #[test] fn build_snapshot_without_checkpoints() { - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "json"), @@ -426,7 +597,7 @@ fn build_snapshot_without_checkpoints() { ///////// Specify no checkpoint or end version ///////// let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root.clone(), None, None).unwrap(); + LogSegment::for_snapshot(storage.as_ref(), log_root.clone(), None, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -439,7 +610,7 @@ fn build_snapshot_without_checkpoints() { assert_eq!(versions, expected_versions); ///////// Specify only end version ///////// - let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, Some(2)).unwrap(); + let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, Some(2)).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -454,7 +625,7 @@ fn build_snapshot_without_checkpoints() { #[test] fn build_snapshot_with_checkpoint_greater_than_time_travel_version() { - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 5, size: 10, parts: None, @@ -463,7 +634,7 @@ fn build_snapshot_with_checkpoint_greater_than_time_travel_version() { checkpoint_schema: None, checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "json"), @@ -481,7 +652,7 @@ fn build_snapshot_with_checkpoint_greater_than_time_travel_version() { ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap(); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -494,7 +665,7 @@ fn build_snapshot_with_checkpoint_greater_than_time_travel_version() { #[test] fn build_snapshot_with_start_checkpoint_and_time_travel_version() { - let checkpoint_metadata = CheckpointMetadata { + let checkpoint_metadata = LastCheckpointHint { version: 3, size: 10, parts: None, @@ -504,7 +675,7 @@ fn build_snapshot_with_start_checkpoint_and_time_travel_version() { checksum: None, }; - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "checkpoint.parquet"), @@ -519,7 +690,7 @@ fn build_snapshot_with_start_checkpoint_and_time_travel_version() { ); let log_segment = - LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap(); + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap(); assert_eq!(log_segment.checkpoint_parts[0].version, 3); assert_eq!(log_segment.ascending_commit_files.len(), 1); @@ -527,7 +698,7 @@ fn build_snapshot_with_start_checkpoint_and_time_travel_version() { } #[test] fn build_table_changes_with_commit_versions() { - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "json"), @@ -547,7 +718,7 @@ fn build_table_changes_with_commit_versions() { ///////// Specify start version and end version ///////// let log_segment = - LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 2, 5).unwrap(); + LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 2, 5).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -561,7 +732,7 @@ fn build_table_changes_with_commit_versions() { ///////// Start version and end version are the same ///////// let log_segment = - LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 0, Some(0)).unwrap(); + LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 0, Some(0)).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -573,7 +744,7 @@ fn build_table_changes_with_commit_versions() { assert_eq!(commit_files[0].version, 0); ///////// Specify no start or end version ///////// - let log_segment = LogSegment::for_table_changes(client.as_ref(), log_root, 0, None).unwrap(); + let log_segment = LogSegment::for_table_changes(storage.as_ref(), log_root, 0, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -589,7 +760,7 @@ fn build_table_changes_with_commit_versions() { #[test] fn test_non_contiguous_log() { // Commit with version 1 is missing - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(2, "json"), @@ -597,26 +768,494 @@ fn test_non_contiguous_log() { None, ); - let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 0, None); + let log_segment_res = + LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 0, None); assert!(log_segment_res.is_err()); - let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 1, None); + let log_segment_res = + LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 1, None); assert!(log_segment_res.is_err()); - let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root, 0, Some(1)); + let log_segment_res = LogSegment::for_table_changes(storage.as_ref(), log_root, 0, Some(1)); assert!(log_segment_res.is_err()); } #[test] fn table_changes_fails_with_larger_start_version_than_end() { // Commit with version 1 is missing - let (client, log_root) = build_log_with_paths_and_checkpoint( + let (storage, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), delta_path_for_version(1, "json"), ], None, ); - let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root, 1, Some(0)); + let log_segment_res = LogSegment::for_table_changes(storage.as_ref(), log_root, 1, Some(0)); assert!(log_segment_res.is_err()); } +#[test] +fn test_sidecar_to_filemeta_valid_paths() -> DeltaResult<()> { + let log_root = Url::parse("file:///var/_delta_log/")?; + let test_cases = [ + ( + "example.parquet", + "file:///var/_delta_log/_sidecars/example.parquet", + ), + ( + "file:///var/_delta_log/_sidecars/example.parquet", + "file:///var/_delta_log/_sidecars/example.parquet", + ), + ( + "test/test/example.parquet", + "file:///var/_delta_log/_sidecars/test/test/example.parquet", + ), + ]; + + for (input_path, expected_url) in test_cases.into_iter() { + let sidecar = Sidecar { + path: expected_url.to_string(), + modification_time: 0, + size_in_bytes: 1000, + tags: None, + }; + + let filemeta = sidecar.to_filemeta(&log_root)?; + assert_eq!( + filemeta.location.as_str(), + expected_url, + "Mismatch for input path: {}", + input_path + ); + } + Ok(()) +} + +#[test] +fn test_checkpoint_batch_with_no_sidecars_returns_none() -> DeltaResult<()> { + let (_, log_root) = new_in_memory_store(); + let engine = Arc::new(SyncEngine::new()); + let checkpoint_batch = add_batch_simple(get_log_schema().clone()); + + let mut iter = LogSegment::process_sidecars( + engine.parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?, + None, + )? + .into_iter() + .flatten(); + + // Assert no batches are returned + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_checkpoint_batch_with_sidecars_returns_sidecar_batches() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?; + + add_sidecar_to_store( + &store, + add_batch_simple(read_schema.clone()), + "sidecarfile1.parquet", + )?; + add_sidecar_to_store( + &store, + add_batch_with_remove(read_schema.clone()), + "sidecarfile2.parquet", + )?; + + let checkpoint_batch = sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + read_schema.clone(), + ); + + let mut iter = LogSegment::process_sidecars( + engine.parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + read_schema.clone(), + None, + )? + .into_iter() + .flatten(); + + // Assert the correctness of batches returned + assert_batch_matches(iter.next().unwrap()?, add_batch_simple(read_schema.clone())); + assert_batch_matches(iter.next().unwrap()?, add_batch_with_remove(read_schema)); + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_checkpoint_batch_with_sidecar_files_that_do_not_exist() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + + let checkpoint_batch = sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + get_log_schema().clone(), + ); + + let mut iter = LogSegment::process_sidecars( + engine.parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?, + None, + )? + .into_iter() + .flatten(); + + // Assert that an error is returned when trying to read sidecar files that do not exist + let err = iter.next().unwrap(); + assert!(err.is_err()); + + Ok(()) +} + +#[test] +fn test_reading_sidecar_files_with_predicate() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?; + + let checkpoint_batch = + sidecar_batch_with_given_paths(vec!["sidecarfile1.parquet"], read_schema.clone()); + + // Add a sidecar file with only add actions + add_sidecar_to_store( + &store, + add_batch_simple(read_schema.clone()), + "sidecarfile1.parquet", + )?; + + // Filter out sidecar files that do not contain remove actions + let remove_predicate: LazyLock> = LazyLock::new(|| { + Some(Arc::new( + Expression::column([REMOVE_NAME, "path"]).is_not_null(), + )) + }); + + let mut iter = LogSegment::process_sidecars( + engine.parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + read_schema.clone(), + remove_predicate.clone(), + )? + .into_iter() + .flatten(); + + // As the sidecar batch contains only add actions, the batch should be filtered out + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_errors_when_schema_has_remove_but_no_sidecar_action( +) -> DeltaResult<()> { + let engine = SyncEngine::new(); + let log_root = Url::parse("s3://example-bucket/logs/")?; + + // Create the stream over checkpoint batches. + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path("file:///00000000000000000001.parquet")], + log_root, + None, + )?; + let result = log_segment.create_checkpoint_stream( + &engine, + get_log_schema().project(&[REMOVE_NAME])?, + None, + ); + + // Errors because the schema has an REMOVE action but no SIDECAR action. + assert!(result.is_err()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_errors_when_schema_has_add_but_no_sidecar_action( +) -> DeltaResult<()> { + let engine = SyncEngine::new(); + let log_root = Url::parse("s3://example-bucket/logs/")?; + + // Create the stream over checkpoint batches. + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path("file:///00000000000000000001.parquet")], + log_root, + None, + )?; + let result = log_segment.create_checkpoint_stream(&engine, get_log_add_schema().clone(), None); + + // Errors because the schema has an ADD action but no SIDECAR action. + assert!(result.is_err()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_returns_checkpoint_batches_as_is_if_schema_has_no_file_actions( +) -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + add_checkpoint_to_store( + &store, + // Create a checkpoint batch with sidecar actions to verify that the sidecar actions are not read. + sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], get_log_schema().clone()), + "00000000000000000001.checkpoint.parquet", + )?; + + let checkpoint_one_file = log_root + .join("00000000000000000001.checkpoint.parquet")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[METADATA_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_one_file)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + first_batch, + sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], v2_checkpoint_read_schema), + ); + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_returns_checkpoint_batches_if_checkpoint_is_multi_part( +) -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + + // Multi-part checkpoints should never contain sidecar actions. + // This test intentionally includes batches with sidecar actions in multi-part checkpoints + // to verify that the reader does not process them. Instead, the reader should short-circuit + // and return the checkpoint batches as-is when encountering a multi-part checkpoint. + // Note: This is a test-only scenario; real tables should never have multi-part + // checkpoints with sidecar actions. + let checkpoint_part_1 = "00000000000000000001.checkpoint.0000000001.0000000002.parquet"; + let checkpoint_part_2 = "00000000000000000001.checkpoint.0000000002.0000000002.parquet"; + + add_checkpoint_to_store( + &store, + sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], get_log_schema().clone()), + checkpoint_part_1, + )?; + add_checkpoint_to_store( + &store, + sidecar_batch_with_given_paths(vec!["sidecar2.parquet"], get_log_schema().clone()), + checkpoint_part_2, + )?; + + let checkpoint_one_file = log_root.join(checkpoint_part_1)?.to_string(); + let checkpoint_two_file = log_root.join(checkpoint_part_2)?.to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![ + create_log_path(&checkpoint_one_file), + create_log_path(&checkpoint_two_file), + ], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert the correctness of batches returned + for expected_sidecar in ["sidecar1.parquet", "sidecar2.parquet"].iter() { + let (batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + batch, + sidecar_batch_with_given_paths( + vec![expected_sidecar], + v2_checkpoint_read_schema.clone(), + ), + ); + } + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_reads_parquet_checkpoint_batch_without_sidecars() -> DeltaResult<()> +{ + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + + add_checkpoint_to_store( + &store, + add_batch_simple(get_log_schema().clone()), + "00000000000000000001.checkpoint.parquet", + )?; + + let checkpoint_one_file = log_root + .join("00000000000000000001.checkpoint.parquet")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_one_file)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches(first_batch, add_batch_simple(v2_checkpoint_read_schema)); + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_reads_json_checkpoint_batch_without_sidecars() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + + write_json_to_store( + &store, + vec![Action::Add(Add { + path: "fake_path_1".into(), + data_change: true, + ..Default::default() + })], + "00000000000000000001.checkpoint.json", + )?; + + let checkpoint_one_file = log_root + .join("00000000000000000001.checkpoint.json")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_one_file)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema, None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + let mut visitor = AddVisitor::default(); + visitor.visit_rows_of(&*first_batch)?; + assert!(visitor.adds.len() == 1); + assert!(visitor.adds[0].path == "fake_path_1"); + + assert!(iter.next().is_none()); + + Ok(()) +} + +// Tests the end-to-end process of creating a checkpoint stream. +// Verifies that: +// - The checkpoint file is read and produces batches containing references to sidecar files. +// - As sidecar references are present, the corresponding sidecar files are processed correctly. +// - Batches from both the checkpoint file and sidecar files are returned. +// - Each returned batch is correctly flagged with is_log_batch set to false +#[test] +fn test_create_checkpoint_stream_reads_checkpoint_file_and_returns_sidecar_batches( +) -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); + + add_checkpoint_to_store( + &store, + sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + get_log_schema().clone(), + ), + "00000000000000000001.checkpoint.parquet", + )?; + + add_sidecar_to_store( + &store, + add_batch_simple(get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?), + "sidecarfile1.parquet", + )?; + add_sidecar_to_store( + &store, + add_batch_with_remove(get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?), + "sidecarfile2.parquet", + )?; + + let checkpoint_file_path = log_root + .join("00000000000000000001.checkpoint.parquet")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_file_path)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + first_batch, + sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?, + ), + ); + // Assert that the second batch returned is from reading sidecarfile1 + let (second_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + second_batch, + add_batch_simple(v2_checkpoint_read_schema.clone()), + ); + + // Assert that the second batch returned is from reading sidecarfile2 + let (third_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + third_batch, + add_batch_with_remove(v2_checkpoint_read_schema), + ); + + assert!(iter.next().is_none()); + + Ok(()) +} diff --git a/kernel/src/parquet.rs b/kernel/src/parquet.rs new file mode 100644 index 000000000..362079290 --- /dev/null +++ b/kernel/src/parquet.rs @@ -0,0 +1,17 @@ +//! This module exists to help re-export the version of arrow used by default-engine and other +//! parts of kernel that need arrow + +#[cfg(feature = "arrow_53")] +pub use parquet_53::*; + +#[cfg(all(feature = "arrow_54", not(feature = "arrow_53")))] +pub use parquet_54::*; + +// if nothing is enabled but we need arrow because of some other feature flag, default to lowest +// supported version +#[cfg(all( + feature = "need_arrow", + not(feature = "arrow_53"), + not(feature = "arrow_54") +))] +compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow_53` or `arrow_54` feature"); diff --git a/kernel/src/path.rs b/kernel/src/path.rs index 23e7819de..e2533d777 100644 --- a/kernel/src/path.rs +++ b/kernel/src/path.rs @@ -2,6 +2,7 @@ use std::str::FromStr; use url::Url; +use uuid::Uuid; use crate::{DeltaResult, Error, FileMeta, Version}; @@ -14,7 +15,7 @@ const MULTIPART_PART_LEN: usize = 10; /// The number of characters in the uuid part of a uuid checkpoint const UUID_PART_LEN: usize = 36; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] enum LogPathFileType { @@ -37,7 +38,7 @@ enum LogPathFileType { Unknown, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] struct ParsedLogPath { @@ -88,7 +89,7 @@ impl ParsedLogPath { let filename = url .path_segments() .ok_or_else(|| Error::invalid_log_path(url))? - .last() + .next_back() .unwrap() // "the iterator always contains at least one string (which may be empty)" .to_string(); if filename.is_empty() { @@ -163,10 +164,11 @@ impl ParsedLogPath { #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] fn is_checkpoint(&self) -> bool { - // TODO: Include UuidCheckpoint once we actually support v2 checkpoints matches!( self.file_type, - LogPathFileType::SinglePartCheckpoint | LogPathFileType::MultiPartCheckpoint { .. } + LogPathFileType::SinglePartCheckpoint + | LogPathFileType::MultiPartCheckpoint { .. } + | LogPathFileType::UuidCheckpoint(_) ) } @@ -174,24 +176,25 @@ impl ParsedLogPath { #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] #[allow(dead_code)] // currently only used in tests, which don't "count" fn is_unknown(&self) -> bool { - // TODO: Stop treating UuidCheckpoint as unknown once we support v2 checkpoints - matches!( - self.file_type, - LogPathFileType::Unknown | LogPathFileType::UuidCheckpoint(_) - ) + matches!(self.file_type, LogPathFileType::Unknown) } } impl ParsedLogPath { - /// Create a new ParsedCommitPath for a new json commit file at the specified version - pub(crate) fn new_commit( - table_root: &Url, - version: Version, - ) -> DeltaResult> { + const DELTA_LOG_DIR: &'static str = "_delta_log/"; + + /// Helper method to create a path with the given filename generator + fn create_path(table_root: &Url, filename: String) -> DeltaResult { + let location = table_root.join(Self::DELTA_LOG_DIR)?.join(&filename)?; + Self::try_from(location)?.ok_or_else(|| { + Error::internal_error(format!("Attempted to create an invalid path: {}", filename)) + }) + } + + /// Create a new ParsedCommitPath for a new json commit file + pub(crate) fn new_commit(table_root: &Url, version: Version) -> DeltaResult { let filename = format!("{:020}.json", version); - let location = table_root.join("_delta_log/")?.join(&filename)?; - let path = Self::try_from(location)? - .ok_or_else(|| Error::internal_error("attempted to create invalid commit path"))?; + let path = Self::create_path(table_root, filename)?; if !path.is_commit() { return Err(Error::internal_error( "ParsedLogPath::new_commit created a non-commit path", @@ -199,6 +202,38 @@ impl ParsedLogPath { } Ok(path) } + + /// Create a new ParsedCheckpointPath for a classic parquet checkpoint file + #[allow(dead_code)] // TODO: Remove this once we have a use case for it + pub(crate) fn new_classic_parquet_checkpoint( + table_root: &Url, + version: Version, + ) -> DeltaResult { + let filename = format!("{:020}.checkpoint.parquet", version); + let path = Self::create_path(table_root, filename)?; + if !path.is_checkpoint() { + return Err(Error::internal_error( + "ParsedLogPath::new_classic_parquet_checkpoint created a non-checkpoint path", + )); + } + Ok(path) + } + + /// Create a new ParsedCheckpointPath for a UUID-based parquet checkpoint file + #[allow(dead_code)] // TODO: Remove this once we have a use case for it + pub(crate) fn new_uuid_parquet_checkpoint( + table_root: &Url, + version: Version, + ) -> DeltaResult { + let filename = format!("{:020}.checkpoint.{}.parquet", version, Uuid::new_v4()); + let path = Self::create_path(table_root, filename)?; + if !path.is_checkpoint() { + return Err(Error::internal_error( + "ParsedLogPath::new_uuid_parquet_checkpoint created a non-checkpoint path", + )); + } + Ok(path) + } } #[cfg(test)] @@ -357,10 +392,7 @@ mod tests { LogPathFileType::UuidCheckpoint(ref u) if u == "3a0d65cd-4056-49b8-937b-95f9e3ee90e5", )); assert!(!log_path.is_commit()); - - // TODO: Support v2 checkpoints! Until then we can't treat these as checkpoint files. - assert!(!log_path.is_checkpoint()); - assert!(log_path.is_unknown()); + assert!(log_path.is_checkpoint()); let log_path = table_log_dir .join("00000000000000000002.checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.json") @@ -377,10 +409,7 @@ mod tests { LogPathFileType::UuidCheckpoint(ref u) if u == "3a0d65cd-4056-49b8-937b-95f9e3ee90e5", )); assert!(!log_path.is_commit()); - - // TODO: Support v2 checkpoints! Until then we can't treat these as checkpoint files. - assert!(!log_path.is_checkpoint()); - assert!(log_path.is_unknown()); + assert!(log_path.is_checkpoint()); let log_path = table_log_dir .join("00000000000000000002.checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.foo") @@ -575,4 +604,42 @@ mod tests { assert!(matches!(log_path.file_type, LogPathFileType::Commit)); assert_eq!(log_path.filename, "00000000000000000010.json"); } + + #[test] + fn test_new_uuid_parquet_checkpoint() { + let table_log_dir = table_log_dir_url(); + let log_path = ParsedLogPath::new_uuid_parquet_checkpoint(&table_log_dir, 10).unwrap(); + + assert_eq!(log_path.version, 10); + assert!(log_path.is_checkpoint()); + assert_eq!(log_path.extension, "parquet"); + if let LogPathFileType::UuidCheckpoint(uuid) = &log_path.file_type { + assert_eq!(uuid.len(), UUID_PART_LEN); + } else { + panic!("Expected UuidCheckpoint file type"); + } + + let filename = log_path.filename.to_string(); + let filename_parts: Vec<&str> = filename.split('.').collect(); + assert_eq!(filename_parts.len(), 4); + assert_eq!(filename_parts[0], "00000000000000000010"); + assert_eq!(filename_parts[1], "checkpoint"); + assert_eq!(filename_parts[2].len(), UUID_PART_LEN); + assert_eq!(filename_parts[3], "parquet"); + } + + #[test] + fn test_new_classic_parquet_checkpoint() { + let table_log_dir = table_log_dir_url(); + let log_path = ParsedLogPath::new_classic_parquet_checkpoint(&table_log_dir, 10).unwrap(); + + assert_eq!(log_path.version, 10); + assert!(log_path.is_checkpoint()); + assert_eq!(log_path.extension, "parquet"); + assert!(matches!( + log_path.file_type, + LogPathFileType::SinglePartCheckpoint + )); + assert_eq!(log_path.filename, "00000000000000000010.checkpoint.parquet"); + } } diff --git a/kernel/src/scan/data_skipping.rs b/kernel/src/scan/data_skipping.rs index 11181863d..c6897453b 100644 --- a/kernel/src/scan/data_skipping.rs +++ b/kernel/src/scan/data_skipping.rs @@ -11,8 +11,8 @@ use crate::expressions::{ column_expr, joined_column_expr, BinaryOperator, ColumnName, Expression as Expr, ExpressionRef, Scalar, VariadicOperator, }; -use crate::predicates::{ - DataSkippingPredicateEvaluator, PredicateEvaluator, PredicateEvaluatorDefaults, +use crate::kernel_predicates::{ + DataSkippingPredicateEvaluator, KernelPredicateEvaluator, KernelPredicateEvaluatorDefaults, }; use crate::schema::{DataType, PrimitiveType, SchemaRef, SchemaTransform, StructField, StructType}; use crate::{Engine, EngineData, ExpressionEvaluator, JsonHandler, RowVisitor as _}; @@ -33,9 +33,9 @@ mod tests; /// /// The variadic operations are rewritten as follows: /// - `AND` is rewritten as a conjunction of the rewritten operands where we just skip operands that -/// are not eligible for data skipping. +/// are not eligible for data skipping. /// - `OR` is rewritten only if all operands are eligible for data skipping. Otherwise, the whole OR -/// expression is dropped. +/// expression is dropped. #[cfg(test)] fn as_data_skipping_predicate(expr: &Expr) -> Option { DataSkippingPredicateCreator.eval(expr) @@ -75,6 +75,28 @@ impl DataSkippingFilter { let (predicate, referenced_schema) = physical_predicate?; debug!("Creating a data skipping filter for {:#?}", predicate); + // Convert all fields into nullable, as stats may not be available for all columns + // (and usually aren't for partition columns). + struct NullableStatsTransform; + impl<'a> SchemaTransform<'a> for NullableStatsTransform { + fn transform_struct_field( + &mut self, + field: &'a StructField, + ) -> Option> { + use Cow::*; + let field = match self.transform(&field.data_type)? { + Borrowed(_) if field.is_nullable() => Borrowed(field), + data_type => Owned(StructField { + name: field.name.clone(), + data_type: data_type.into_owned(), + nullable: true, + metadata: field.metadata.clone(), + }), + }; + Some(field) + } + } + // Convert a min/max stats schema into a nullcount schema (all leaf fields are LONG) struct NullCountStatsTransform; impl<'a> SchemaTransform<'a> for NullCountStatsTransform { @@ -85,14 +107,19 @@ impl DataSkippingFilter { Some(Cow::Owned(PrimitiveType::Long)) } } - let nullcount_schema = NullCountStatsTransform + + let stats_schema = NullableStatsTransform .transform_struct(&referenced_schema)? .into_owned(); + + let nullcount_schema = NullCountStatsTransform + .transform_struct(&stats_schema)? + .into_owned(); let stats_schema = Arc::new(StructType::new([ StructField::nullable("numRecords", DataType::LONG), StructField::nullable("nullCount", nullcount_schema), - StructField::nullable("minValues", referenced_schema.clone()), - StructField::nullable("maxValues", referenced_schema), + StructField::nullable("minValues", stats_schema.clone()), + StructField::nullable("maxValues", stats_schema), ])); // Skipping happens in several steps: @@ -106,20 +133,20 @@ impl DataSkippingFilter { // // 3. The selection evaluator does DISTINCT(col(predicate), 'false') to produce true (= keep) when // the predicate is true/null and false (= skip) when the predicate is false. - let select_stats_evaluator = engine.get_expression_handler().get_evaluator( + let select_stats_evaluator = engine.evaluation_handler().new_expression_evaluator( // safety: kernel is very broken if we don't have the schema for Add actions get_log_add_schema().clone(), STATS_EXPR.clone(), DataType::STRING, ); - let skipping_evaluator = engine.get_expression_handler().get_evaluator( + let skipping_evaluator = engine.evaluation_handler().new_expression_evaluator( stats_schema.clone(), Expr::struct_from([as_sql_data_skipping_predicate(&predicate)?]), PREDICATE_SCHEMA.clone(), ); - let filter_evaluator = engine.get_expression_handler().get_evaluator( + let filter_evaluator = engine.evaluation_handler().new_expression_evaluator( stats_schema.clone(), FILTER_EXPR.clone(), DataType::BOOLEAN, @@ -130,7 +157,7 @@ impl DataSkippingFilter { select_stats_evaluator, skipping_evaluator, filter_evaluator, - json_handler: engine.get_json_handler(), + json_handler: engine.json_handler(), }) } @@ -213,11 +240,11 @@ impl DataSkippingPredicateEvaluator for DataSkippingPredicateCreator { } fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option { - PredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted).map(Expr::literal) + KernelPredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted).map(Expr::literal) } fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option { - PredicateEvaluatorDefaults::eval_scalar(val, inverted).map(Expr::literal) + KernelPredicateEvaluatorDefaults::eval_scalar(val, inverted).map(Expr::literal) } fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option { @@ -235,7 +262,7 @@ impl DataSkippingPredicateEvaluator for DataSkippingPredicateCreator { right: &Scalar, inverted: bool, ) -> Option { - PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted) + KernelPredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted) .map(Expr::literal) } diff --git a/kernel/src/scan/data_skipping/tests.rs b/kernel/src/scan/data_skipping/tests.rs index 2ca7a0c01..4cac4e64a 100644 --- a/kernel/src/scan/data_skipping/tests.rs +++ b/kernel/src/scan/data_skipping/tests.rs @@ -1,7 +1,7 @@ use super::*; use crate::expressions::column_name; -use crate::predicates::{DefaultPredicateEvaluator, UnimplementedColumnResolver}; +use crate::kernel_predicates::{DefaultKernelPredicateEvaluator, UnimplementedColumnResolver}; use std::collections::HashMap; const TRUE: Option = Some(true); @@ -32,7 +32,7 @@ fn test_eval_is_null() { (column_name!("numRecords"), Scalar::from(2i64)), (column_name!("nullCount.x"), Scalar::from(nullcount)), ]); - let filter = DefaultPredicateEvaluator::from(resolver); + let filter = DefaultKernelPredicateEvaluator::from(resolver); for (expr, expect) in expressions.iter().zip(expected) { let pred = as_data_skipping_predicate(expr).unwrap(); expect_eq!( @@ -75,7 +75,7 @@ fn test_eval_binary_comparisons() { (column_name!("minValues.x"), min.clone()), (column_name!("maxValues.x"), max.clone()), ]); - let filter = DefaultPredicateEvaluator::from(resolver); + let filter = DefaultKernelPredicateEvaluator::from(resolver); for (expr, expect) in expressions.iter().zip(expected.iter()) { let pred = as_data_skipping_predicate(expr).unwrap(); expect_eq!( @@ -149,7 +149,7 @@ fn test_eval_variadic() { (&[NULL, TRUE, FALSE], FALSE, TRUE), (&[NULL, FALSE, TRUE], FALSE, TRUE), ]; - let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver); + let filter = DefaultKernelPredicateEvaluator::from(UnimplementedColumnResolver); for (inputs, expect_and, expect_or) in test_cases { let inputs: Vec<_> = inputs .iter() @@ -214,7 +214,7 @@ fn test_eval_distinct() { (column_name!("minValues.x"), min.clone()), (column_name!("maxValues.x"), max.clone()), ]); - let filter = DefaultPredicateEvaluator::from(resolver); + let filter = DefaultKernelPredicateEvaluator::from(resolver); for (expr, expect) in expressions.iter().zip(expected) { let pred = as_data_skipping_predicate(expr).unwrap(); expect_eq!( @@ -286,7 +286,7 @@ fn test_sql_where() { (column_name!("maxValues.x"), max.clone()), ]) }; - let filter = DefaultPredicateEvaluator::from(resolver); + let filter = DefaultKernelPredicateEvaluator::from(resolver); let pred = as_data_skipping_predicate(expr).unwrap(); expect_eq!( filter.eval_expr(&pred, false), diff --git a/kernel/src/scan/log_replay.rs b/kernel/src/scan/log_replay.rs index 177996a80..4ae6d28a5 100644 --- a/kernel/src/scan/log_replay.rs +++ b/kernel/src/scan/log_replay.rs @@ -3,39 +3,74 @@ use std::collections::{HashMap, HashSet}; use std::sync::{Arc, LazyLock}; use itertools::Itertools; -use tracing::debug; use super::data_skipping::DataSkippingFilter; -use super::{ScanData, Transform}; +use super::{ScanMetadata, Transform}; use crate::actions::get_log_add_schema; use crate::engine_data::{GetData, RowVisitor, TypedGetData as _}; use crate::expressions::{column_expr, column_name, ColumnName, Expression, ExpressionRef}; -use crate::scan::{DeletionVectorDescriptor, TransformExpr}; +use crate::kernel_predicates::{DefaultKernelPredicateEvaluator, KernelPredicateEvaluator as _}; +use crate::log_replay::{FileActionDeduplicator, FileActionKey, LogReplayProcessor}; +use crate::scan::{Scalar, TransformExpr}; use crate::schema::{ColumnNamesAndTypes, DataType, MapType, SchemaRef, StructField, StructType}; use crate::utils::require; use crate::{DeltaResult, Engine, EngineData, Error, ExpressionEvaluator}; -/// The subset of file action fields that uniquely identifies it in the log, used for deduplication -/// of adds and removes during log replay. -#[derive(Debug, Hash, Eq, PartialEq)] -struct FileActionKey { - path: String, - dv_unique_id: Option, -} -impl FileActionKey { - fn new(path: impl Into, dv_unique_id: Option) -> Self { - let path = path.into(); - Self { path, dv_unique_id } - } -} - -struct LogReplayScanner { - filter: Option, - +/// [`ScanLogReplayProcessor`] performs log replay (processes actions) specifically for doing a table scan. +/// +/// During a table scan, the processor reads batches of log actions (in reverse chronological order) +/// and performs the following steps: +/// +/// - Data Skipping: Applies a predicate-based filter (via [`DataSkippingFilter`]) to quickly skip +/// files that are irrelevant for the query. +/// - Partition Pruning: Uses an optional partition filter (extracted from a physical predicate) +/// to exclude actions whose partition values do not meet the required criteria. +/// - Action Deduplication: Leverages the [`FileActionDeduplicator`] to ensure that for each unique file +/// (identified by its path and deletion vector unique ID), only the latest valid Add action is processed. +/// - Transformation: Applies a built-in transformation (`add_transform`) to convert selected Add actions +/// into [`ScanMetadata`], the intermediate format passed to the engine. +/// - Row Transform Passthrough: Any user-provided row-level transformation expressions (e.g. those derived +/// from projection or filters) are preserved and passed through to the engine, which applies them as part +/// of its scan execution logic. +/// +/// As an implementation of [`LogReplayProcessor`], [`ScanLogReplayProcessor`] provides the +/// `process_actions_batch` method, which applies these steps to each batch of log actions and +/// produces a [`ScanMetadata`] result. This result includes the transformed batch, a selection +/// vector indicating which rows are valid, and any row-level transformation expressions that need +/// to be applied to the selected rows. +struct ScanLogReplayProcessor { + partition_filter: Option, + data_skipping_filter: Option, + add_transform: Arc, + logical_schema: SchemaRef, + transform: Option>, /// A set of (data file path, dv_unique_id) pairs that have been seen thus /// far in the log. This is used to filter out files with Remove actions as /// well as duplicate entries in the log. - seen: HashSet, + seen_file_keys: HashSet, +} + +impl ScanLogReplayProcessor { + /// Create a new [`ScanLogReplayProcessor`] instance + fn new( + engine: &dyn Engine, + physical_predicate: Option<(ExpressionRef, SchemaRef)>, + logical_schema: SchemaRef, + transform: Option>, + ) -> Self { + Self { + partition_filter: physical_predicate.as_ref().map(|(e, _)| e.clone()), + data_skipping_filter: DataSkippingFilter::new(engine, physical_predicate), + add_transform: engine.evaluation_handler().new_expression_evaluator( + get_log_add_schema().clone(), + get_add_transform_expr(), + SCAN_ROW_DATATYPE.clone(), + ), + seen_file_keys: Default::default(), + logical_schema, + transform, + } + } } /// A visitor that deduplicates a stream of add and remove actions into a stream of valid adds. Log @@ -43,68 +78,96 @@ struct LogReplayScanner { /// pair, we should ignore all subsequent (older) actions for that same (path, dvId) pair. If the /// first action for a given file is a remove, then that file does not show up in the result at all. struct AddRemoveDedupVisitor<'seen> { - seen: &'seen mut HashSet, + deduplicator: FileActionDeduplicator<'seen>, selection_vector: Vec, logical_schema: SchemaRef, transform: Option>, + partition_filter: Option, row_transform_exprs: Vec>, - is_log_batch: bool, } impl AddRemoveDedupVisitor<'_> { - /// Checks if log replay already processed this logical file (in which case the current action - /// should be ignored). If not already seen, register it so we can recognize future duplicates. - /// Returns `true` if we have seen the file and should ignore it, `false` if we have not seen it - /// and should process it. - fn check_and_record_seen(&mut self, key: FileActionKey) -> bool { - // Note: each (add.path + add.dv_unique_id()) pair has a - // unique Add + Remove pair in the log. For example: - // https://github.com/delta-io/delta/blob/master/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json - - if self.seen.contains(&key) { - debug!( - "Ignoring duplicate ({}, {:?}) in scan, is log {}", - key.path, key.dv_unique_id, self.is_log_batch - ); - true - } else { - debug!( - "Including ({}, {:?}) in scan, is log {}", - key.path, key.dv_unique_id, self.is_log_batch - ); - if self.is_log_batch { - // Remember file actions from this batch so we can ignore duplicates as we process - // batches from older commit and/or checkpoint files. We don't track checkpoint - // batches because they are already the oldest actions and never replace anything. - self.seen.insert(key); - } - false + // These index positions correspond to the order of columns defined in + // `selected_column_names_and_types()` + const ADD_PATH_INDEX: usize = 0; // Position of "add.path" in getters + const ADD_PARTITION_VALUES_INDEX: usize = 1; // Position of "add.partitionValues" in getters + const ADD_DV_START_INDEX: usize = 2; // Start position of add deletion vector columns + const REMOVE_PATH_INDEX: usize = 5; // Position of "remove.path" in getters + const REMOVE_DV_START_INDEX: usize = 6; // Start position of remove deletion vector columns + + fn new( + seen: &mut HashSet, + selection_vector: Vec, + logical_schema: SchemaRef, + transform: Option>, + partition_filter: Option, + is_log_batch: bool, + ) -> AddRemoveDedupVisitor<'_> { + AddRemoveDedupVisitor { + deduplicator: FileActionDeduplicator::new( + seen, + is_log_batch, + Self::ADD_PATH_INDEX, + Self::REMOVE_PATH_INDEX, + Self::ADD_DV_START_INDEX, + Self::REMOVE_DV_START_INDEX, + ), + selection_vector, + logical_schema, + transform, + partition_filter, + row_transform_exprs: Vec::new(), } } + fn parse_partition_value( + &self, + field_idx: usize, + partition_values: &HashMap, + ) -> DeltaResult<(usize, (String, Scalar))> { + let field = self.logical_schema.fields.get_index(field_idx); + let Some((_, field)) = field else { + return Err(Error::InternalError(format!( + "out of bounds partition column field index {field_idx}" + ))); + }; + let name = field.physical_name(); + let partition_value = + super::parse_partition_value(partition_values.get(name), field.data_type())?; + Ok((field_idx, (name.to_string(), partition_value))) + } + + fn parse_partition_values( + &self, + transform: &Transform, + partition_values: &HashMap, + ) -> DeltaResult> { + transform + .iter() + .filter_map(|transform_expr| match transform_expr { + TransformExpr::Partition(field_idx) => { + Some(self.parse_partition_value(*field_idx, partition_values)) + } + TransformExpr::Static(_) => None, + }) + .try_collect() + } + /// Compute an expression that will transform from physical to logical for a given Add file action - fn get_transform_expr<'a>( + fn get_transform_expr( &self, - i: usize, transform: &Transform, - getters: &[&'a dyn GetData<'a>], + mut partition_values: HashMap, ) -> DeltaResult { - let partition_values: HashMap<_, _> = getters[1].get(i, "add.partitionValues")?; let transforms = transform .iter() .map(|transform_expr| match transform_expr { TransformExpr::Partition(field_idx) => { - let field = self.logical_schema.fields.get_index(*field_idx); - let Some((_, field)) = field else { - return Err(Error::Generic( - format!("logical schema did not contain expected field at {field_idx}, can't transform data") - )); + let Some((_, partition_value)) = partition_values.remove(field_idx) else { + return Err(Error::InternalError(format!( + "missing partition value for field index {field_idx}" + ))); }; - let name = field.physical_name(); - let partition_value = super::parse_partition_value( - partition_values.get(name), - field.data_type(), - )?; Ok(partition_value.into()) } TransformExpr::Static(field_expr) => Ok(field_expr.clone()), @@ -113,40 +176,69 @@ impl AddRemoveDedupVisitor<'_> { Ok(Arc::new(Expression::Struct(transforms))) } + fn is_file_partition_pruned( + &self, + partition_values: &HashMap, + ) -> bool { + if partition_values.is_empty() { + return false; + } + let Some(partition_filter) = &self.partition_filter else { + return false; + }; + let partition_values: HashMap<_, _> = partition_values + .values() + .map(|(k, v)| (ColumnName::new([k]), v.clone())) + .collect(); + let evaluator = DefaultKernelPredicateEvaluator::from(partition_values); + evaluator.eval_sql_where(partition_filter) == Some(false) + } + /// True if this row contains an Add action that should survive log replay. Skip it if the row /// is not an Add action, or the file has already been seen previously. fn is_valid_add<'a>(&mut self, i: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult { - // Add will have a path at index 0 if it is valid; otherwise, if it is a log batch, we may - // have a remove with a path at index 4. In either case, extract the three dv getters at - // indexes that immediately follow a valid path index. - let (path, dv_getters, is_add) = if let Some(path) = getters[0].get_str(i, "add.path")? { - (path, &getters[2..5], true) - } else if !self.is_log_batch { - return Ok(false); - } else if let Some(path) = getters[5].get_opt(i, "remove.path")? { - (path, &getters[6..9], false) - } else { + // When processing file actions, we extract path and deletion vector information based on action type: + // - For Add actions: path is at index 0, followed by DV fields at indexes 2-4 + // - For Remove actions (in log batches only): path is at index 5, followed by DV fields at indexes 6-8 + // The file extraction logic selects the appropriate indexes based on whether we found a valid path. + // Remove getters are not included when visiting a non-log batch (checkpoint batch), so do + // not try to extract remove actions in that case. + let Some((file_key, is_add)) = self.deduplicator.extract_file_action( + i, + getters, + !self.deduplicator.is_log_batch(), // skip_removes. true if this is a checkpoint batch + )? + else { return Ok(false); }; - let dv_unique_id = match dv_getters[0].get_opt(i, "deletionVector.storageType")? { - Some(storage_type) => Some(DeletionVectorDescriptor::unique_id_from_parts( - storage_type, - dv_getters[1].get(i, "deletionVector.pathOrInlineDv")?, - dv_getters[2].get_opt(i, "deletionVector.offset")?, - )), - None => None, + // Apply partition pruning (to adds only) before deduplication, so that we don't waste memory + // tracking pruned files. Removes don't get pruned and we'll still have to track them. + // + // WARNING: It's not safe to partition-prune removes (just like it's not safe to data skip + // removes), because they are needed to suppress earlier incompatible adds we might + // encounter if the table's schema was replaced after the most recent checkpoint. + let partition_values = match &self.transform { + Some(transform) if is_add => { + let partition_values = + getters[Self::ADD_PARTITION_VALUES_INDEX].get(i, "add.partitionValues")?; + let partition_values = self.parse_partition_values(transform, &partition_values)?; + if self.is_file_partition_pruned(&partition_values) { + return Ok(false); + } + partition_values + } + _ => Default::default(), }; // Check both adds and removes (skipping already-seen), but only transform and return adds - let file_key = FileActionKey::new(path, dv_unique_id); - if self.check_and_record_seen(file_key) || !is_add { + if self.deduplicator.check_and_record_seen(file_key) || !is_add { return Ok(false); } let transform = self .transform .as_ref() - .map(|transform| self.get_transform_expr(i, transform, getters)) + .map(|transform| self.get_transform_expr(transform, partition_values)) .transpose()?; if transform.is_some() { // fill in any needed `None`s for previous rows @@ -179,7 +271,7 @@ impl RowVisitor for AddRemoveDedupVisitor<'_> { (names, types).into() }); let (names, types) = NAMES_AND_TYPES.as_ref(); - if self.is_log_batch { + if self.deduplicator.is_log_batch() { (names, types) } else { // All checkpoint actions are already reconciled and Remove actions in checkpoint files @@ -189,7 +281,8 @@ impl RowVisitor for AddRemoveDedupVisitor<'_> { } fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<()> { - let expected_getters = if self.is_log_batch { 9 } else { 5 }; + let is_log_batch = self.deduplicator.is_log_batch(); + let expected_getters = if is_log_batch { 9 } else { 5 }; require!( getters.len() == expected_getters, Error::InternalError(format!( @@ -246,45 +339,41 @@ fn get_add_transform_expr() -> Expression { ]) } -impl LogReplayScanner { - /// Create a new [`LogReplayScanner`] instance - fn new(engine: &dyn Engine, physical_predicate: Option<(ExpressionRef, SchemaRef)>) -> Self { - Self { - filter: DataSkippingFilter::new(engine, physical_predicate), - seen: Default::default(), - } - } +impl LogReplayProcessor for ScanLogReplayProcessor { + type Output = ScanMetadata; - fn process_scan_batch( + fn process_actions_batch( &mut self, - add_transform: &dyn ExpressionEvaluator, - actions: &dyn EngineData, - logical_schema: SchemaRef, - transform: Option>, + actions_batch: &dyn EngineData, is_log_batch: bool, - ) -> DeltaResult { - // Apply data skipping to get back a selection vector for actions that passed skipping. We - // will update the vector below as log replay identifies duplicates that should be ignored. - let selection_vector = match &self.filter { - Some(filter) => filter.apply(actions)?, - None => vec![true; actions.len()], - }; - assert_eq!(selection_vector.len(), actions.len()); - - let mut visitor = AddRemoveDedupVisitor { - seen: &mut self.seen, + ) -> DeltaResult { + // Build an initial selection vector for the batch which has had the data skipping filter + // applied. The selection vector is further updated by the deduplication visitor to remove + // rows that are not valid adds. + let selection_vector = self.build_selection_vector(actions_batch)?; + assert_eq!(selection_vector.len(), actions_batch.len()); + + let mut visitor = AddRemoveDedupVisitor::new( + &mut self.seen_file_keys, selection_vector, - logical_schema, - transform, - row_transform_exprs: Vec::new(), + self.logical_schema.clone(), + self.transform.clone(), + self.partition_filter.clone(), is_log_batch, - }; - visitor.visit_rows_of(actions)?; + ); + visitor.visit_rows_of(actions_batch)?; // TODO: Teach expression eval to respect the selection vector we just computed so carefully! - let selection_vector = visitor.selection_vector; - let result = add_transform.evaluate(actions)?; - Ok((result, selection_vector, visitor.row_transform_exprs)) + let result = self.add_transform.evaluate(actions_batch)?; + Ok(ScanMetadata::new( + result, + visitor.selection_vector, + visitor.row_transform_exprs, + )) + } + + fn data_skipping_filter(&self) -> Option<&DataSkippingFilter> { + self.data_skipping_filter.as_ref() } } @@ -298,31 +387,16 @@ pub(crate) fn scan_action_iter( logical_schema: SchemaRef, transform: Option>, physical_predicate: Option<(ExpressionRef, SchemaRef)>, -) -> impl Iterator> { - let mut log_scanner = LogReplayScanner::new(engine, physical_predicate); - let add_transform = engine.get_expression_handler().get_evaluator( - get_log_add_schema().clone(), - get_add_transform_expr(), - SCAN_ROW_DATATYPE.clone(), - ); - action_iter - .map(move |action_res| { - let (batch, is_log_batch) = action_res?; - log_scanner.process_scan_batch( - add_transform.as_ref(), - batch.as_ref(), - logical_schema.clone(), - transform.clone(), - is_log_batch, - ) - }) - .filter(|res| res.as_ref().map_or(true, |(_, sv, _)| sv.contains(&true))) +) -> impl Iterator> { + ScanLogReplayProcessor::new(engine, physical_predicate, logical_schema, transform) + .process_actions_iter(action_iter) } #[cfg(test)] mod tests { use std::{collections::HashMap, sync::Arc}; + use crate::actions::get_log_schema; use crate::expressions::{column_name, Scalar}; use crate::scan::state::{DvInfo, Stats}; use crate::scan::test_utils::{ @@ -364,7 +438,7 @@ mod tests { #[test] fn test_scan_action_iter() { run_with_validate_callback( - vec![add_batch_simple()], + vec![add_batch_simple(get_log_schema().clone())], None, // not testing schema None, // not testing transform &[true, false], @@ -376,7 +450,7 @@ mod tests { #[test] fn test_scan_action_iter_with_remove() { run_with_validate_callback( - vec![add_batch_with_remove()], + vec![add_batch_with_remove(get_log_schema().clone())], None, // not testing schema None, // not testing transform &[false, false, true, false], @@ -387,7 +461,7 @@ mod tests { #[test] fn test_no_transforms() { - let batch = vec![add_batch_simple()]; + let batch = vec![add_batch_simple(get_log_schema().clone())]; let logical_schema = Arc::new(crate::schema::StructType::new(vec![])); let iter = scan_action_iter( &SyncEngine::new(), @@ -397,8 +471,11 @@ mod tests { None, ); for res in iter { - let (_batch, _sel, transforms) = res.unwrap(); - assert!(transforms.is_empty(), "Should have no transforms"); + let scan_metadata = res.unwrap(); + assert!( + scan_metadata.scan_file_transforms.is_empty(), + "Should have no transforms" + ); } } @@ -443,7 +520,8 @@ mod tests { } for res in iter { - let (_batch, _sel, transforms) = res.unwrap(); + let scan_metadata = res.unwrap(); + let transforms = scan_metadata.scan_file_transforms; // in this case we have a metadata action first and protocol 3rd, so we expect 4 items, // the first and 3rd being a `None` assert_eq!(transforms.len(), 4, "Should have 4 transforms"); diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index 14e2ee50f..1a9412089 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -11,9 +11,11 @@ use url::Url; use crate::actions::deletion_vector::{ deletion_treemap_to_bools, split_vector, DeletionVectorDescriptor, }; -use crate::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; +use crate::actions::{get_log_schema, ADD_NAME, REMOVE_NAME, SIDECAR_NAME}; +use crate::engine_data::FilteredEngineData; use crate::expressions::{ColumnName, Expression, ExpressionRef, ExpressionTransform, Scalar}; -use crate::predicates::{DefaultPredicateEvaluator, EmptyColumnResolver}; +use crate::kernel_predicates::{DefaultKernelPredicateEvaluator, EmptyColumnResolver}; +use crate::log_replay::HasSelectionVector; use crate::scan::state::{DvInfo, Stats}; use crate::schema::{ ArrayType, DataType, MapType, PrimitiveType, Schema, SchemaRef, SchemaTransform, StructField, @@ -96,9 +98,7 @@ impl ScanBuilder { /// perform actual data reads. pub fn build(self) -> DeltaResult { // if no schema is provided, use snapshot's entire schema (e.g. SELECT *) - let logical_schema = self - .schema - .unwrap_or_else(|| self.snapshot.schema().clone().into()); + let logical_schema = self.schema.unwrap_or_else(|| self.snapshot.schema()); let state_info = get_state_info( logical_schema.as_ref(), &self.snapshot.metadata().partition_columns, @@ -184,8 +184,9 @@ impl PhysicalPredicate { // the predicate allows to statically skip all files. Since this is direct evaluation (not an // expression rewrite), we use a `DefaultPredicateEvaluator` with an empty column resolver. fn can_statically_skip_all_files(predicate: &Expression) -> bool { - use crate::predicates::PredicateEvaluator as _; - DefaultPredicateEvaluator::from(EmptyColumnResolver).eval_sql_where(predicate) == Some(false) + use crate::kernel_predicates::KernelPredicateEvaluator as _; + DefaultKernelPredicateEvaluator::from(EmptyColumnResolver).eval_sql_where(predicate) + == Some(false) } // Build the stats read schema filtering the table schema to keep only skipping-eligible @@ -322,9 +323,48 @@ pub(crate) enum TransformExpr { Partition(usize), } -// TODO(nick): Make this a struct in a follow-on PR -// (data, deletion_vec, transforms) -pub type ScanData = (Box, Vec, Vec>); +/// [`ScanMetadata`] contains (1) a batch of [`FilteredEngineData`] specifying data files to be scanned +/// and (2) a vector of transforms (one transform per scan file) that must be applied to the data read +/// from those files. +pub struct ScanMetadata { + /// Filtered engine data with one row per file to scan (and only selected rows should be scanned) + pub scan_files: FilteredEngineData, + + /// Row-level transformations to apply to data read from files. + /// + /// Each entry in this vector corresponds to a row in the `scan_files` data. The entry is an + /// optional expression that must be applied to convert the file's data into the logical schema + /// expected by the scan: + /// + /// - `Some(expr)`: Apply this expression to transform the data to match [`Scan::schema()`]. + /// - `None`: No transformation is needed; the data is already in the correct logical form. + /// + /// Note: This vector can be indexed by row number, as rows masked by the selection vector will + /// have corresponding entries that will be `None`. + pub scan_file_transforms: Vec>, +} + +impl ScanMetadata { + fn new( + data: Box, + selection_vector: Vec, + scan_file_transforms: Vec>, + ) -> Self { + Self { + scan_files: FilteredEngineData { + data, + selection_vector, + }, + scan_file_transforms, + } + } +} + +impl HasSelectionVector for ScanMetadata { + fn has_selected_rows(&self) -> bool { + self.scan_files.selection_vector.contains(&true) + } +} /// The result of building a scan over a table. This can be used to get the actual data from /// scanning the table. @@ -378,9 +418,9 @@ impl Scan { .collect() } - /// Get an iterator of [`EngineData`]s that should be included in scan for a query. This handles - /// log-replay, reconciling Add and Remove actions, and applying data skipping (if - /// possible). Each item in the returned iterator is a tuple of: + /// Get an iterator of [`ScanMetadata`]s that should be used to facilitate a scan. This handles + /// log-replay, reconciling Add and Remove actions, and applying data skipping (if possible). + /// Each item in the returned iterator is a struct of: /// - `Box`: Data in engine format, where each row represents a file to be /// scanned. The schema for each row can be obtained by calling [`scan_row_schema`]. /// - `Vec`: A selection vector. If a row is at index `i` and this vector is `false` at @@ -391,22 +431,22 @@ impl Scan { /// `filter_record_batch`, you _need_ to extend this vector to the full length of the batch or /// arrow will drop the extra rows. /// - `Vec>`: Transformation expressions that need to be applied. For each - /// row at index `i` in the above data, if an expression exists at index `i` in the `Vec`, - /// the associated expression _must_ be applied to the data read from the file specified by - /// the row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If - /// the item at index `i` in this `Vec` is `None`, or if the `Vec` contains fewer than `i` - /// elements, no expression need be applied and the data read from disk is already in the - /// correct logical state. - pub fn scan_data( + /// row at index `i` in the above data, if an expression exists at index `i` in the `Vec`, + /// the associated expression _must_ be applied to the data read from the file specified by + /// the row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If + /// the item at index `i` in this `Vec` is `None`, or if the `Vec` contains fewer than `i` + /// elements, no expression need be applied and the data read from disk is already in the + /// correct logical state. + pub fn scan_metadata( &self, engine: &dyn Engine, - ) -> DeltaResult>> { + ) -> DeltaResult>> { // Compute the static part of the transformation. This is `None` if no transformation is // needed (currently just means no partition cols AND no column mapping but will be extended // for other transforms as we support them) let static_transform = (self.have_partition_cols || self.snapshot.column_mapping_mode() != ColumnMappingMode::None) - .then_some(Arc::new(Scan::get_static_transform(&self.all_fields))); + .then(|| Arc::new(Scan::get_static_transform(&self.all_fields))); let physical_predicate = match self.physical_predicate.clone() { PhysicalPredicate::StaticSkipAll => return Ok(None.into_iter().flatten()), PhysicalPredicate::Some(predicate, schema) => Some((predicate, schema)), @@ -414,7 +454,7 @@ impl Scan { }; let it = scan_action_iter( engine, - self.replay_for_scan_data(engine)?, + self.replay_for_scan_metadata(engine)?, self.logical_schema.clone(), static_transform, physical_predicate, @@ -423,18 +463,21 @@ impl Scan { } // Factored out to facilitate testing - fn replay_for_scan_data( + fn replay_for_scan_metadata( &self, engine: &dyn Engine, ) -> DeltaResult, bool)>> + Send> { let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); + let checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; // NOTE: We don't pass any meta-predicate because we expect no meaningful row group skipping // when ~every checkpoint file will contain the adds and removes we are looking for. - self.snapshot - .log_segment() - .replay(engine, commit_read_schema, checkpoint_read_schema, None) + self.snapshot.log_segment().read_actions( + engine, + commit_read_schema, + checkpoint_read_schema, + None, + ) } /// Get global state that is valid for the entire scan. This is somewhat expensive so should @@ -448,14 +491,14 @@ impl Scan { } } - /// Perform an "all in one" scan. This will use the provided `engine` to read and - /// process all the data for the query. Each [`ScanResult`] in the resultant iterator encapsulates - /// the raw data and an optional boolean vector built from the deletion vector if it was - /// present. See the documentation for [`ScanResult`] for more details. Generally - /// connectors/engines will want to use [`Scan::scan_data`] so they can have more control over - /// the execution of the scan. - // This calls [`Scan::scan_data`] to get an iterator of `ScanData` actions for the scan, and then uses the - // `engine`'s [`crate::ParquetHandler`] to read the actual table data. + /// Perform an "all in one" scan. This will use the provided `engine` to read and process all + /// the data for the query. Each [`ScanResult`] in the resultant iterator encapsulates the raw + /// data and an optional boolean vector built from the deletion vector if it was present. See + /// the documentation for [`ScanResult`] for more details. Generally connectors/engines will + /// want to use [`Scan::scan_metadata`] so they can have more control over the execution of the + /// scan. + // This calls [`Scan::scan_metadata`] to get an iterator of `ScanMetadata` actions for the scan, + // and then uses the `engine`'s [`crate::ParquetHandler`] to read the actual table data. pub fn execute( &self, engine: Arc, @@ -466,7 +509,7 @@ impl Scan { dv_info: DvInfo, transform: Option, } - fn scan_data_callback( + fn scan_metadata_callback( batches: &mut Vec, path: &str, size: i64, @@ -492,18 +535,12 @@ impl Scan { let table_root = self.snapshot.table_root().clone(); let physical_predicate = self.physical_predicate(); - let scan_data = self.scan_data(engine.as_ref())?; - let scan_files_iter = scan_data + let scan_metadata_iter = self.scan_metadata(engine.as_ref())?; + let scan_files_iter = scan_metadata_iter .map(|res| { - let (data, vec, transforms) = res?; + let scan_metadata = res?; let scan_files = vec![]; - state::visit_scan_files( - data.as_ref(), - &vec, - &transforms, - scan_files, - scan_data_callback, - ) + scan_metadata.visit_scan_files(scan_files, scan_metadata_callback) }) // Iterator>> to Iterator> .flatten_ok(); @@ -525,7 +562,7 @@ impl Scan { // partition columns, but the read schema we use here does _NOT_ include partition // columns. So we cannot safely assume that all column references are valid. See // https://github.com/delta-io/delta-kernel-rs/issues/434 for more details. - let read_result_iter = engine.get_parquet_handler().read_parquet_files( + let read_result_iter = engine.parquet_handler().read_parquet_files( &[meta], global_state.physical_schema.clone(), physical_predicate.clone(), @@ -567,7 +604,7 @@ impl Scan { } } -/// Get the schema that scan rows (from [`Scan::scan_data`]) will be returned with. +/// Get the schema that scan rows (from [`Scan::scan_metadata`]) will be returned with. /// /// It is: /// ```ignored @@ -655,19 +692,19 @@ pub fn selection_vector( descriptor: &DeletionVectorDescriptor, table_root: &Url, ) -> DeltaResult> { - let fs_client = engine.get_file_system_client(); - let dv_treemap = descriptor.read(fs_client, table_root)?; + let storage = engine.storage_handler(); + let dv_treemap = descriptor.read(storage, table_root)?; Ok(deletion_treemap_to_bools(dv_treemap)) } // some utils that are used in file_stream.rs and state.rs tests #[cfg(test)] pub(crate) mod test_utils { + use crate::arrow::array::StringArray; + use crate::utils::test_utils::string_array_to_engine_data; + use itertools::Itertools; use std::sync::Arc; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; - use crate::{ actions::get_log_schema, engine::{ @@ -676,37 +713,59 @@ pub(crate) mod test_utils { }, scan::log_replay::scan_action_iter, schema::SchemaRef, - EngineData, JsonHandler, + JsonHandler, }; use super::{state::ScanCallback, Transform}; - // TODO(nick): Merge all copies of this into one "test utils" thing - fn string_array_to_engine_data(string_array: StringArray) -> Box { - let string_field = Arc::new(Field::new("a", DataType::Utf8, true)); - let schema = Arc::new(ArrowSchema::new(vec![string_field])); - let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)]) - .expect("Can't convert to record batch"); - Box::new(ArrowEngineData::new(batch)) + // Generates a batch of sidecar actions with the given paths. + // The schema is provided as null columns affect equality checks. + pub(crate) fn sidecar_batch_with_given_paths( + paths: Vec<&str>, + output_schema: SchemaRef, + ) -> Box { + let handler = SyncJsonHandler {}; + + let mut json_strings: Vec = paths + .iter() + .map(|path| { + format!( + r#"{{"sidecar":{{"path":"{path}","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{{"tag_foo":"tag_bar"}}}}}}"# + ) + }) + .collect(); + json_strings.push(r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#.to_string()); + + let json_strings_array: StringArray = + json_strings.iter().map(|s| s.as_str()).collect_vec().into(); + + let parsed = handler + .parse_json( + string_array_to_engine_data(json_strings_array), + output_schema, + ) + .unwrap(); + + ArrowEngineData::try_from_engine_data(parsed).unwrap() } - // simple add - pub(crate) fn add_batch_simple() -> Box { + // Generates a batch with an add action. + // The schema is provided as null columns affect equality checks. + pub(crate) fn add_batch_simple(output_schema: SchemaRef) -> Box { let handler = SyncJsonHandler {}; let json_strings: StringArray = vec![ r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues": {"date": "2017-12-10"},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#, r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#, ] .into(); - let output_schema = get_log_schema().clone(); let parsed = handler .parse_json(string_array_to_engine_data(json_strings), output_schema) .unwrap(); ArrowEngineData::try_from_engine_data(parsed).unwrap() } - // add batch with a removed file - pub(crate) fn add_batch_with_remove() -> Box { + // An add batch with a removed file parsed with the schema provided + pub(crate) fn add_batch_with_remove(output_schema: SchemaRef) -> Box { let handler = SyncJsonHandler {}; let json_strings: StringArray = vec![ r#"{"remove":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c001.snappy.parquet","deletionTimestamp":1677811194426,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":635,"tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#, @@ -715,7 +774,6 @@ pub(crate) mod test_utils { r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#, ] .into(); - let output_schema = get_log_schema().clone(); let parsed = handler .parse_json(string_array_to_engine_data(json_strings), output_schema) .unwrap(); @@ -761,16 +819,11 @@ pub(crate) mod test_utils { ); let mut batch_count = 0; for res in iter { - let (batch, sel, transforms) = res.unwrap(); - assert_eq!(sel, expected_sel_vec); - crate::scan::state::visit_scan_files( - batch.as_ref(), - &sel, - &transforms, - context.clone(), - validate_callback, - ) - .unwrap(); + let scan_metadata = res.unwrap(); + assert_eq!(scan_metadata.scan_files.selection_vector, expected_sel_vec); + scan_metadata + .visit_scan_files(context.clone(), validate_callback) + .unwrap(); batch_count += 1; } assert_eq!(batch_count, 1); @@ -959,8 +1012,8 @@ mod tests { } fn get_files_for_scan(scan: Scan, engine: &dyn Engine) -> DeltaResult> { - let scan_data = scan.scan_data(engine)?; - fn scan_data_callback( + let scan_metadata_iter = scan.scan_metadata(engine)?; + fn scan_metadata_callback( paths: &mut Vec, path: &str, _size: i64, @@ -973,21 +1026,15 @@ mod tests { assert!(dv_info.deletion_vector.is_none()); } let mut files = vec![]; - for data in scan_data { - let (data, vec, transforms) = data?; - files = state::visit_scan_files( - data.as_ref(), - &vec, - &transforms, - files, - scan_data_callback, - )?; + for res in scan_metadata_iter { + let scan_metadata = res?; + files = scan_metadata.visit_scan_files(files, scan_metadata_callback)?; } Ok(files) } #[test] - fn test_scan_data_paths() { + fn test_scan_metadata_paths() { let path = std::fs::canonicalize(PathBuf::from("./tests/data/table-without-dv-small/")).unwrap(); let url = url::Url::from_directory_path(path).unwrap(); @@ -1005,7 +1052,7 @@ mod tests { } #[test_log::test] - fn test_scan_data() { + fn test_scan_metadata() { let path = std::fs::canonicalize(PathBuf::from("./tests/data/table-without-dv-small/")).unwrap(); let url = url::Url::from_directory_path(path).unwrap(); @@ -1066,7 +1113,7 @@ mod tests { } #[test] - fn test_replay_for_scan_data() { + fn test_replay_for_scan_metadata() { let path = std::fs::canonicalize(PathBuf::from("./tests/data/parquet_row_group_skipping/")); let url = url::Url::from_directory_path(path.unwrap()).unwrap(); let engine = SyncEngine::new(); @@ -1075,7 +1122,7 @@ mod tests { let snapshot = table.snapshot(&engine, None).unwrap(); let scan = snapshot.into_scan_builder().build().unwrap(); let data: Vec<_> = scan - .replay_for_scan_data(&engine) + .replay_for_scan_metadata(&engine) .unwrap() .try_collect() .unwrap(); diff --git a/kernel/src/scan/state.rs b/kernel/src/scan/state.rs index 85eb6e4a7..b04518026 100644 --- a/kernel/src/scan/state.rs +++ b/kernel/src/scan/state.rs @@ -19,6 +19,7 @@ use serde::{Deserialize, Serialize}; use tracing::warn; use super::log_replay::SCAN_ROW_SCHEMA; +use super::ScanMetadata; /// State that doesn't change between scans #[derive(Clone, Debug, Serialize, Deserialize)] @@ -68,8 +69,8 @@ impl DvInfo { self.deletion_vector .as_ref() .map(|dv_descriptor| { - let fs_client = engine.get_file_system_client(); - dv_descriptor.read(fs_client, table_root) + let storage = engine.storage_handler(); + dv_descriptor.read(storage, table_root) }) .transpose() } @@ -92,8 +93,8 @@ impl DvInfo { self.deletion_vector .as_ref() .map(|dv| { - let fs_client = engine.get_file_system_client(); - dv.row_indexes(fs_client, table_root) + let storage = engine.storage_handler(); + dv.row_indexes(storage, table_root) }) .transpose() } @@ -110,8 +111,8 @@ pub fn transform_to_logical( ) -> DeltaResult> { match transform { Some(ref transform) => engine - .get_expression_handler() - .get_evaluator( + .evaluation_handler() + .new_expression_evaluator( physical_schema.clone(), transform.as_ref().clone(), // TODO: Maybe eval should take a ref logical_schema.clone().into(), @@ -135,12 +136,13 @@ pub type ScanCallback = fn( /// scan. /// /// The arguments to the callback are: -/// * `context`: an `&mut context` argument. this can be anything that engine needs to pass through to each call +/// * `context`: an `&mut context` argument. this can be anything that engine needs to pass through +/// to each call /// * `path`: a `&str` which is the path to the file /// * `size`: an `i64` which is the size of the file /// * `dv_info`: a [`DvInfo`] struct, which allows getting the selection vector for this file -/// * `transform`: An optional expression that, if present, _must_ be applied to physical data to convert it to -/// the correct logical format +/// * `transform`: An optional expression that, if present, _must_ be applied to physical data to +/// convert it to the correct logical format /// * `partition_values`: a `HashMap` which are partition values /// /// ## Context @@ -151,33 +153,26 @@ pub type ScanCallback = fn( /// ## Example /// ```ignore /// let mut context = [my context]; -/// for res in scan_data { // scan data from scan.scan_data() -/// let (data, vector) = res?; -/// context = delta_kernel::scan::state::visit_scan_files( -/// data.as_ref(), -/// selection_vector, +/// for res in scan_metadata_iter { // scan metadata iterator from scan.scan_metadata() +/// let scan_metadata = res?; +/// context = scan_metadata.visit_scan_files( /// context, /// my_callback, /// )?; /// } /// ``` -pub fn visit_scan_files( - data: &dyn EngineData, - selection_vector: &[bool], - transforms: &[Option], - context: T, - callback: ScanCallback, -) -> DeltaResult { - let mut visitor = ScanFileVisitor { - callback, - selection_vector, - transforms, - context, - }; - visitor.visit_rows_of(data)?; - Ok(visitor.context) +impl ScanMetadata { + pub fn visit_scan_files(&self, context: T, callback: ScanCallback) -> DeltaResult { + let mut visitor = ScanFileVisitor { + callback, + selection_vector: &self.scan_files.selection_vector, + transforms: &self.scan_file_transforms, + context, + }; + visitor.visit_rows_of(self.scan_files.data.as_ref())?; + Ok(visitor.context) + } } - // add some visitor magic for engines struct ScanFileVisitor<'a, T> { callback: ScanCallback, @@ -243,6 +238,7 @@ impl RowVisitor for ScanFileVisitor<'_, T> { mod tests { use std::collections::HashMap; + use crate::actions::get_log_schema; use crate::scan::test_utils::{add_batch_simple, run_with_validate_callback}; use crate::ExpressionRef; @@ -279,10 +275,10 @@ mod tests { } #[test] - fn test_simple_visit_scan_data() { + fn test_simple_visit_scan_metadata() { let context = TestContext { id: 2 }; run_with_validate_callback( - vec![add_batch_simple()], + vec![add_batch_simple(get_log_schema().clone())], None, // not testing schema None, // not testing transform &[true, false], diff --git a/kernel/src/schema/mod.rs b/kernel/src/schema/mod.rs index 6086a7031..3a5648b57 100644 --- a/kernel/src/schema/mod.rs +++ b/kernel/src/schema/mod.rs @@ -22,7 +22,7 @@ pub type SchemaRef = Arc; #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)] #[serde(untagged)] pub enum MetadataValue { - Number(i32), + Number(i64), String(String), Boolean(bool), // The [PROTOCOL](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#struct-field) states @@ -32,8 +32,8 @@ pub enum MetadataValue { Other(serde_json::Value), } -impl std::fmt::Display for MetadataValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl Display for MetadataValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { MetadataValue::Number(n) => write!(f, "{n}"), MetadataValue::String(s) => write!(f, "{s}"), @@ -61,8 +61,8 @@ impl From<&str> for MetadataValue { } } -impl From for MetadataValue { - fn from(value: i32) -> Self { +impl From for MetadataValue { + fn from(value: i64) -> Self { Self::Number(value) } } @@ -226,6 +226,11 @@ impl StructField { .unwrap() .into_owned() } + + fn has_invariants(&self) -> bool { + self.metadata + .contains_key(ColumnMetadataKey::Invariants.as_ref()) + } } /// A struct is used to represent both the top-level schema of the table @@ -286,6 +291,16 @@ impl StructType { self.fields.values() } + pub(crate) fn fields_len(&self) -> usize { + // O(1) for indexmap + self.fields.len() + } + + // Checks if the `StructType` contains a field with the specified name. + pub(crate) fn contains(&self, name: impl AsRef) -> bool { + self.fields.contains_key(name.as_ref()) + } + /// Extracts the name and type of all leaf columns, in schema order. Caller should pass Some /// `own_name` if this schema is embedded in a larger struct (e.g. `add.*`) and None if the /// schema is a top-level result (e.g. `*`). @@ -300,6 +315,34 @@ impl StructType { } } +#[derive(Debug, Default)] +pub(crate) struct InvariantChecker { + has_invariants: bool, +} + +impl<'a> SchemaTransform<'a> for InvariantChecker { + fn transform_struct_field(&mut self, field: &'a StructField) -> Option> { + if field.has_invariants() { + self.has_invariants = true; + } else if !self.has_invariants { + let _ = self.recurse_into_struct_field(field); + } + Some(Cow::Borrowed(field)) + } +} + +impl InvariantChecker { + /// Checks if any column in the schema (including nested columns) has invariants defined. + /// + /// This traverses the entire schema to check for the presence of the "delta.invariants" + /// metadata key. + pub(crate) fn has_invariants(schema: &Schema) -> bool { + let mut checker = InvariantChecker::default(); + let _ = checker.transform_struct(schema); + checker.has_invariants + } +} + /// Helper for RowVisitor implementations #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[derive(Clone, Default)] @@ -1034,16 +1077,22 @@ mod tests { "nullable": true, "metadata": { "delta.columnMapping.id": 4, - "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1" + "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1", + "delta.identity.start": 2147483648 } } "#; + let field: StructField = serde_json::from_str(data).unwrap(); let col_id = field .get_config_value(&ColumnMetadataKey::ColumnMappingId) .unwrap(); + let id_start = field + .get_config_value(&ColumnMetadataKey::IdentityStart) + .unwrap(); assert!(matches!(col_id, MetadataValue::Number(num) if *num == 4)); + assert!(matches!(id_start, MetadataValue::Number(num) if *num == 2147483648i64)); assert_eq!( field.physical_name(), "col-5f422f40-de70-45b2-88ab-1d5c90e94db1" @@ -1203,4 +1252,111 @@ mod tests { "[\"an\",\"array\"]" ); } + + #[test] + fn test_fields_len() { + let schema = StructType::new([]); + assert!(schema.fields_len() == 0); + let schema = StructType::new([ + StructField::nullable("a", DataType::LONG), + StructField::nullable("b", DataType::LONG), + StructField::nullable("c", DataType::LONG), + StructField::nullable("d", DataType::LONG), + ]); + assert_eq!(schema.fields_len(), 4); + let schema = StructType::new([ + StructField::nullable("b", DataType::LONG), + StructField::not_null("b", DataType::LONG), + StructField::nullable("c", DataType::LONG), + StructField::nullable("c", DataType::LONG), + ]); + assert_eq!(schema.fields_len(), 2); + } + + #[test] + fn test_has_invariants() { + // Schema with no invariants + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + ]); + assert!(!InvariantChecker::has_invariants(&schema)); + + // Schema with top-level invariant + let mut field = StructField::nullable("c", DataType::STRING); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("c > 0".to_string()), + ); + + let schema = StructType::new([StructField::nullable("a", DataType::STRING), field]); + assert!(InvariantChecker::has_invariants(&schema)); + + // Schema with nested invariant in a struct + let nested_field = StructField::nullable( + "nested_c", + DataType::struct_type([{ + let mut field = StructField::nullable("d", DataType::INTEGER); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("d > 0".to_string()), + ); + field + }]), + ); + + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + nested_field, + ]); + assert!(InvariantChecker::has_invariants(&schema)); + + // Schema with nested invariant in an array of structs + let array_field = StructField::nullable( + "array_field", + ArrayType::new( + DataType::struct_type([{ + let mut field = StructField::nullable("d", DataType::INTEGER); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("d > 0".to_string()), + ); + field + }]), + true, + ), + ); + + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + array_field, + ]); + assert!(InvariantChecker::has_invariants(&schema)); + + // Schema with nested invariant in a map value that's a struct + let map_field = StructField::nullable( + "map_field", + MapType::new( + DataType::STRING, + DataType::struct_type([{ + let mut field = StructField::nullable("d", DataType::INTEGER); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("d > 0".to_string()), + ); + field + }]), + true, + ), + ); + + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + map_field, + ]); + assert!(InvariantChecker::has_invariants(&schema)); + } } diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs index f198b9080..8b0bc86fd 100644 --- a/kernel/src/snapshot.rs +++ b/kernel/src/snapshot.rs @@ -7,13 +7,13 @@ use tracing::{debug, warn}; use url::Url; use crate::actions::{Metadata, Protocol}; -use crate::log_segment::LogSegment; +use crate::log_segment::{self, LogSegment}; use crate::scan::ScanBuilder; -use crate::schema::Schema; +use crate::schema::{Schema, SchemaRef}; use crate::table_configuration::TableConfiguration; use crate::table_features::ColumnMappingMode; use crate::table_properties::TableProperties; -use crate::{DeltaResult, Engine, Error, FileSystemClient, Version}; +use crate::{DeltaResult, Engine, Error, StorageHandler, Version}; const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint"; // TODO expose methods for accessing the files of a table (with file pruning). @@ -21,6 +21,7 @@ const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint"; /// throughout time, `Snapshot`s represent a view of a table at a specific point in time; they /// have a defined schema (which may change over time for any given table), specific version, and /// frozen log segment. +#[derive(PartialEq, Eq)] pub struct Snapshot { log_segment: LogSegment, table_configuration: TableConfiguration, @@ -43,30 +44,188 @@ impl std::fmt::Debug for Snapshot { } impl Snapshot { + fn new(log_segment: LogSegment, table_configuration: TableConfiguration) -> Self { + Self { + log_segment, + table_configuration, + } + } + /// Create a new [`Snapshot`] instance for the given version. /// /// # Parameters /// /// - `table_root`: url pointing at the table root (where `_delta_log` folder is located) /// - `engine`: Implementation of [`Engine`] apis. - /// - `version`: target version of the [`Snapshot`] + /// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest + /// version of the table. pub fn try_new( table_root: Url, engine: &dyn Engine, version: Option, ) -> DeltaResult { - let fs_client = engine.get_file_system_client(); + let storage = engine.storage_handler(); let log_root = table_root.join("_delta_log/")?; - let checkpoint_hint = read_last_checkpoint(fs_client.as_ref(), &log_root)?; + let checkpoint_hint = read_last_checkpoint(storage.as_ref(), &log_root)?; let log_segment = - LogSegment::for_snapshot(fs_client.as_ref(), log_root, checkpoint_hint, version)?; + LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_hint, version)?; // try_new_from_log_segment will ensure the protocol is supported Self::try_new_from_log_segment(table_root, log_segment, engine) } + /// Create a new [`Snapshot`] instance from an existing [`Snapshot`]. This is useful when you + /// already have a [`Snapshot`] lying around and want to do the minimal work to 'update' the + /// snapshot to a later version. + /// + /// We implement a simple heuristic: + /// 1. if the new version == existing version, just return the existing snapshot + /// 2. if the new version < existing version, error: there is no optimization to do here + /// 3. list from (existing checkpoint version + 1) onward (or just existing snapshot version if + /// no checkpoint) + /// 4. a. if new checkpoint is found: just create a new snapshot from that checkpoint (and + /// commits after it) + /// b. if no new checkpoint is found: do lightweight P+M replay on the latest commits (after + /// ensuring we only retain commits > any checkpoints) + /// + /// # Parameters + /// + /// - `existing_snapshot`: reference to an existing [`Snapshot`] + /// - `engine`: Implementation of [`Engine`] apis. + /// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest + /// version of the table. + pub fn try_new_from( + existing_snapshot: Arc, + engine: &dyn Engine, + version: impl Into>, + ) -> DeltaResult> { + let old_log_segment = &existing_snapshot.log_segment; + let old_version = existing_snapshot.version(); + let new_version = version.into(); + if let Some(new_version) = new_version { + if new_version == old_version { + // Re-requesting the same version + return Ok(existing_snapshot.clone()); + } + if new_version < old_version { + // Hint is too new: error since this is effectively an incorrect optimization + return Err(Error::Generic(format!( + "Requested snapshot version {} is older than snapshot hint version {}", + new_version, old_version + ))); + } + } + + let log_root = old_log_segment.log_root.clone(); + let storage = engine.storage_handler(); + + // Start listing just after the previous segment's checkpoint, if any + let listing_start = old_log_segment.checkpoint_version.unwrap_or(0) + 1; + + // Check for new commits + let (new_ascending_commit_files, checkpoint_parts) = + log_segment::list_log_files_with_version( + storage.as_ref(), + &log_root, + Some(listing_start), + new_version, + )?; + + // NB: we need to check both checkpoints and commits since we filter commits at and below + // the checkpoint version. Example: if we have a checkpoint + commit at version 1, the log + // listing above will only return the checkpoint and not the commit. + if new_ascending_commit_files.is_empty() && checkpoint_parts.is_empty() { + match new_version { + Some(new_version) if new_version != old_version => { + // No new commits, but we are looking for a new version + return Err(Error::Generic(format!( + "Requested snapshot version {} is newer than the latest version {}", + new_version, old_version + ))); + } + _ => { + // No new commits, just return the same snapshot + return Ok(existing_snapshot.clone()); + } + } + } + + // create a log segment just from existing_checkpoint.version -> new_version + // OR could be from 1 -> new_version + let mut new_log_segment = LogSegment::try_new( + new_ascending_commit_files, + checkpoint_parts, + log_root.clone(), + new_version, + )?; + + let new_end_version = new_log_segment.end_version; + if new_end_version < old_version { + // we should never see a new log segment with a version < the existing snapshot + // version, that would mean a commit was incorrectly deleted from the log + return Err(Error::Generic(format!( + "Unexpected state: The newest version in the log {} is older than the old version {}", + new_end_version, old_version))); + } + if new_end_version == old_version { + // No new commits, just return the same snapshot + return Ok(existing_snapshot.clone()); + } + + if new_log_segment.checkpoint_version.is_some() { + // we have a checkpoint in the new LogSegment, just construct a new snapshot from that + let snapshot = Self::try_new_from_log_segment( + existing_snapshot.table_root().clone(), + new_log_segment, + engine, + ); + return Ok(Arc::new(snapshot?)); + } + + // after this point, we incrementally update the snapshot with the new log segment. + // first we remove the 'overlap' in commits, example: + // + // old logsegment checkpoint1-commit1-commit2-commit3 + // 1. new logsegment commit1-commit2-commit3 + // 2. new logsegment commit1-commit2-commit3-commit4 + // 3. new logsegment checkpoint2+commit2-commit3-commit4 + // + // retain does + // 1. new logsegment [empty] -> caught above + // 2. new logsegment [commit4] + // 3. new logsegment [checkpoint2-commit3] -> caught above + new_log_segment + .ascending_commit_files + .retain(|log_path| old_version < log_path.version); + + // we have new commits and no new checkpoint: we replay new commits for P+M and then + // create a new snapshot by combining LogSegments and building a new TableConfiguration + let (new_metadata, new_protocol) = new_log_segment.protocol_and_metadata(engine)?; + let table_configuration = TableConfiguration::try_new_from( + existing_snapshot.table_configuration(), + new_metadata, + new_protocol, + new_log_segment.end_version, + )?; + // NB: we must add the new log segment to the existing snapshot's log segment + let mut ascending_commit_files = old_log_segment.ascending_commit_files.clone(); + ascending_commit_files.extend(new_log_segment.ascending_commit_files); + // we can pass in just the old checkpoint parts since by the time we reach this line, we + // know there are no checkpoints in the new log segment. + let combined_log_segment = LogSegment::try_new( + ascending_commit_files, + old_log_segment.checkpoint_parts.clone(), + log_root, + new_version, + )?; + Ok(Arc::new(Snapshot::new( + combined_log_segment, + table_configuration, + ))) + } + /// Create a new [`Snapshot`] instance. pub(crate) fn try_new_from_log_segment( location: Url, @@ -97,18 +256,21 @@ impl Snapshot { self.table_configuration().version() } - /// Table [`Schema`] at this `Snapshot`s version. - pub fn schema(&self) -> &Schema { + /// Table [`type@Schema`] at this `Snapshot`s version. + pub fn schema(&self) -> SchemaRef { self.table_configuration.schema() } /// Table [`Metadata`] at this `Snapshot`s version. - pub fn metadata(&self) -> &Metadata { + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + pub(crate) fn metadata(&self) -> &Metadata { self.table_configuration.metadata() } /// Table [`Protocol`] at this `Snapshot`s version. - pub fn protocol(&self) -> &Protocol { + #[allow(dead_code)] + #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + pub(crate) fn protocol(&self) -> &Protocol { self.table_configuration.protocol() } @@ -139,11 +301,12 @@ impl Snapshot { } } +// Note: Schema can not be derived because the checkpoint schema is only known at runtime. #[derive(Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] -struct CheckpointMetadata { +struct LastCheckpointHint { /// The version of the table when the last checkpoint was made. #[allow(unreachable_pub)] // used by acceptance tests (TODO make an fn accessor?) pub version: Version, @@ -170,11 +333,11 @@ struct CheckpointMetadata { /// /// TODO: java kernel retries three times before failing, should we do the same? fn read_last_checkpoint( - fs_client: &dyn FileSystemClient, + storage: &dyn StorageHandler, log_root: &Url, -) -> DeltaResult> { +) -> DeltaResult> { let file_path = log_root.join(LAST_CHECKPOINT_FILE_NAME)?; - match fs_client + match storage .read_files(vec![(file_path, None)]) .and_then(|mut data| data.next().expect("read_files should return one file")) { @@ -197,12 +360,20 @@ mod tests { use object_store::memory::InMemory; use object_store::path::Path; use object_store::ObjectStore; + use serde_json::json; + + use crate::arrow::array::StringArray; + use crate::arrow::record_batch::RecordBatch; + use crate::parquet::arrow::ArrowWriter; + use crate::engine::arrow_data::ArrowEngineData; use crate::engine::default::executor::tokio::TokioBackgroundExecutor; - use crate::engine::default::filesystem::ObjectStoreFileSystemClient; + use crate::engine::default::filesystem::ObjectStoreStorageHandler; + use crate::engine::default::DefaultEngine; use crate::engine::sync::SyncEngine; use crate::path::ParsedLogPath; - use crate::schema::StructType; + use crate::utils::test_utils::string_array_to_engine_data; + use test_utils::{add_commit, delta_path_for_version}; #[test] fn test_snapshot_read_metadata() { @@ -218,8 +389,8 @@ mod tests { assert_eq!(snapshot.protocol(), &expected); let schema_string = r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#; - let expected: StructType = serde_json::from_str(schema_string).unwrap(); - assert_eq!(snapshot.schema(), &expected); + let expected: SchemaRef = serde_json::from_str(schema_string).unwrap(); + assert_eq!(snapshot.schema(), expected); } #[test] @@ -236,8 +407,213 @@ mod tests { assert_eq!(snapshot.protocol(), &expected); let schema_string = r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#; - let expected: StructType = serde_json::from_str(schema_string).unwrap(); - assert_eq!(snapshot.schema(), &expected); + let expected: SchemaRef = serde_json::from_str(schema_string).unwrap(); + assert_eq!(snapshot.schema(), expected); + } + + // interesting cases for testing Snapshot::new_from: + // 1. new version < existing version + // 2. new version == existing version + // 3. new version > existing version AND + // a. log segment hasn't changed + // b. log segment for old..=new version has a checkpoint (with new protocol/metadata) + // b. log segment for old..=new version has no checkpoint + // i. commits have (new protocol, new metadata) + // ii. commits have (new protocol, no metadata) + // iii. commits have (no protocol, new metadata) + // iv. commits have (no protocol, no metadata) + #[tokio::test] + async fn test_snapshot_new_from() -> DeltaResult<()> { + let path = + std::fs::canonicalize(PathBuf::from("./tests/data/table-with-dv-small/")).unwrap(); + let url = url::Url::from_directory_path(path).unwrap(); + + let engine = SyncEngine::new(); + let old_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(1)).unwrap()); + // 1. new version < existing version: error + let snapshot_res = Snapshot::try_new_from(old_snapshot.clone(), &engine, Some(0)); + assert!(matches!( + snapshot_res, + Err(Error::Generic(msg)) if msg == "Requested snapshot version 0 is older than snapshot hint version 1" + )); + + // 2. new version == existing version + let snapshot = Snapshot::try_new_from(old_snapshot.clone(), &engine, Some(1)).unwrap(); + let expected = old_snapshot.clone(); + assert_eq!(snapshot, expected); + + // tests Snapshot::new_from by: + // 1. creating a snapshot with new API for commits 0..=2 (based on old snapshot at 0) + // 2. comparing with a snapshot created directly at version 2 + // + // the commits tested are: + // - commit 0 -> base snapshot at this version + // - commit 1 -> final snapshots at this version + // + // in each test we will modify versions 1 and 2 to test different scenarios + fn test_new_from(store: Arc) -> DeltaResult<()> { + let url = Url::parse("memory:///")?; + let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); + let base_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(0))?); + let snapshot = Snapshot::try_new_from(base_snapshot.clone(), &engine, Some(1))?; + let expected = Snapshot::try_new(url.clone(), &engine, Some(1))?; + assert_eq!(snapshot, expected.into()); + Ok(()) + } + + // TODO: unify this and lots of stuff in LogSegment tests and test_utils + async fn commit(store: &InMemory, version: Version, commit: Vec) { + let commit_data = commit + .iter() + .map(ToString::to_string) + .collect::>() + .join("\n"); + add_commit(store, version, commit_data).await.unwrap(); + } + + // for (3) we will just engineer custom log files + let store = Arc::new(InMemory::new()); + // everything will have a starting 0 commit with commitInfo, protocol, metadata + let commit0 = vec![ + json!({ + "commitInfo": { + "timestamp": 1587968586154i64, + "operation": "WRITE", + "operationParameters": {"mode":"ErrorIfExists","partitionBy":"[]"}, + "isBlindAppend":true + } + }), + json!({ + "protocol": { + "minReaderVersion": 1, + "minWriterVersion": 2 + } + }), + json!({ + "metaData": { + "id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9", + "format": { + "provider": "parquet", + "options": {} + }, + "schemaString": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}", + "partitionColumns": [], + "configuration": {}, + "createdTime": 1587968585495i64 + } + }), + ]; + commit(store.as_ref(), 0, commit0.clone()).await; + // 3. new version > existing version + // a. no new log segment + let url = Url::parse("memory:///")?; + let engine = DefaultEngine::new( + Arc::new(store.fork()), + Arc::new(TokioBackgroundExecutor::new()), + ); + let base_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(0))?); + let snapshot = Snapshot::try_new_from(base_snapshot.clone(), &engine, None)?; + let expected = Snapshot::try_new(url.clone(), &engine, Some(0))?; + assert_eq!(snapshot, expected.into()); + // version exceeds latest version of the table = err + assert!(matches!( + Snapshot::try_new_from(base_snapshot.clone(), &engine, Some(1)), + Err(Error::Generic(msg)) if msg == "Requested snapshot version 1 is newer than the latest version 0" + )); + + // b. log segment for old..=new version has a checkpoint (with new protocol/metadata) + let store_3a = store.fork(); + let mut checkpoint1 = commit0.clone(); + commit(&store_3a, 1, commit0.clone()).await; + checkpoint1[1] = json!({ + "protocol": { + "minReaderVersion": 2, + "minWriterVersion": 5 + } + }); + checkpoint1[2]["partitionColumns"] = serde_json::to_value(["some_partition_column"])?; + + let handler = engine.json_handler(); + let json_strings: StringArray = checkpoint1 + .into_iter() + .map(|json| json.to_string()) + .collect::>() + .into(); + let parsed = handler + .parse_json( + string_array_to_engine_data(json_strings), + crate::actions::get_log_schema().clone(), + ) + .unwrap(); + let checkpoint = ArrowEngineData::try_from_engine_data(parsed).unwrap(); + let checkpoint: RecordBatch = checkpoint.into(); + + // Write the record batch to a Parquet file + let mut buffer = vec![]; + let mut writer = ArrowWriter::try_new(&mut buffer, checkpoint.schema(), None)?; + writer.write(&checkpoint)?; + writer.close()?; + + store + .put( + &delta_path_for_version(1, "checkpoint.parquet"), + buffer.into(), + ) + .await + .unwrap(); + test_new_from(store_3a.into())?; + + // c. log segment for old..=new version has no checkpoint + // i. commits have (new protocol, new metadata) + let store_3c_i = Arc::new(store.fork()); + let mut commit1 = commit0.clone(); + commit1[1] = json!({ + "protocol": { + "minReaderVersion": 2, + "minWriterVersion": 5 + } + }); + commit1[2]["partitionColumns"] = serde_json::to_value(["some_partition_column"])?; + commit(store_3c_i.as_ref(), 1, commit1).await; + test_new_from(store_3c_i.clone())?; + + // new commits AND request version > end of log + let url = Url::parse("memory:///")?; + let engine = DefaultEngine::new(store_3c_i, Arc::new(TokioBackgroundExecutor::new())); + let base_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(0))?); + assert!(matches!( + Snapshot::try_new_from(base_snapshot.clone(), &engine, Some(2)), + Err(Error::Generic(msg)) if msg == "LogSegment end version 1 not the same as the specified end version 2" + )); + + // ii. commits have (new protocol, no metadata) + let store_3c_ii = store.fork(); + let mut commit1 = commit0.clone(); + commit1[1] = json!({ + "protocol": { + "minReaderVersion": 2, + "minWriterVersion": 5 + } + }); + commit1.remove(2); // remove metadata + commit(&store_3c_ii, 1, commit1).await; + test_new_from(store_3c_ii.into())?; + + // iii. commits have (no protocol, new metadata) + let store_3c_iii = store.fork(); + let mut commit1 = commit0.clone(); + commit1[2]["partitionColumns"] = serde_json::to_value(["some_partition_column"])?; + commit1.remove(1); // remove protocol + commit(&store_3c_iii, 1, commit1).await; + test_new_from(store_3c_iii.into())?; + + // iv. commits have (no protocol, no metadata) + let store_3c_iv = store.fork(); + let commit1 = vec![commit0[0].clone()]; + commit(&store_3c_iv, 1, commit1).await; + test_new_from(store_3c_iv.into())?; + + Ok(()) } #[test] @@ -249,14 +625,12 @@ mod tests { let url = url::Url::from_directory_path(path).unwrap(); let store = Arc::new(LocalFileSystem::new()); - let prefix = Path::from(url.path()); - let client = ObjectStoreFileSystemClient::new( + let storage = ObjectStoreStorageHandler::new( store, false, // don't have ordered listing - prefix, Arc::new(TokioBackgroundExecutor::new()), ); - let cp = read_last_checkpoint(&client, &url).unwrap(); + let cp = read_last_checkpoint(&storage, &url).unwrap(); assert!(cp.is_none()) } @@ -288,16 +662,15 @@ mod tests { .expect("put _last_checkpoint"); }); - let client = ObjectStoreFileSystemClient::new( + let storage = ObjectStoreStorageHandler::new( store, false, // don't have ordered listing - Path::from("/"), Arc::new(TokioBackgroundExecutor::new()), ); let url = Url::parse("memory:///valid/").expect("valid url"); - let valid = read_last_checkpoint(&client, &url).expect("read last checkpoint"); + let valid = read_last_checkpoint(&storage, &url).expect("read last checkpoint"); let url = Url::parse("memory:///invalid/").expect("valid url"); - let invalid = read_last_checkpoint(&client, &url).expect("read last checkpoint"); + let invalid = read_last_checkpoint(&storage, &url).expect("read last checkpoint"); assert!(valid.is_some()); assert!(invalid.is_none()) } diff --git a/kernel/src/table_changes/log_replay.rs b/kernel/src/table_changes/log_replay.rs index 89951a39b..20fc11c6e 100644 --- a/kernel/src/table_changes/log_replay.rs +++ b/kernel/src/table_changes/log_replay.rs @@ -27,21 +27,21 @@ use itertools::Itertools; #[cfg(test)] mod tests; -/// Scan data for a Change Data Feed query. This holds metadata that is needed to read data rows. -pub(crate) struct TableChangesScanData { +/// Scan metadata for a Change Data Feed query. This holds metadata that's needed to read data rows. +pub(crate) struct TableChangesScanMetadata { /// Engine data with the schema defined in [`scan_row_schema`] /// /// Note: The schema of the engine data will be updated in the future to include columns /// used by Change Data Feed. - pub(crate) scan_data: Box, - /// The selection vector used to filter the `scan_data`. + pub(crate) scan_metadata: Box, + /// The selection vector used to filter the `scan_metadata`. pub(crate) selection_vector: Vec, /// A map from a remove action's path to its deletion vector pub(crate) remove_dvs: Arc>, } -/// Given an iterator of [`ParsedLogPath`] returns an iterator of [`TableChangesScanData`]. -/// Each row that is selected in the returned `TableChangesScanData.scan_data` (according +/// Given an iterator of [`ParsedLogPath`] returns an iterator of [`TableChangesScanMetadata`]. +/// Each row that is selected in the returned `TableChangesScanMetadata.scan_metadata` (according /// to the `selection_vector` field) _must_ be processed to complete the scan. Non-selected /// rows _must_ be ignored. /// @@ -52,7 +52,7 @@ pub(crate) fn table_changes_action_iter( commit_files: impl IntoIterator, table_schema: SchemaRef, physical_predicate: Option<(ExpressionRef, SchemaRef)>, -) -> DeltaResult>> { +) -> DeltaResult>> { let filter = DataSkippingFilter::new(engine.as_ref(), physical_predicate).map(Arc::new); let result = commit_files .into_iter() @@ -65,8 +65,9 @@ pub(crate) fn table_changes_action_iter( Ok(result) } -/// Processes a single commit file from the log to generate an iterator of [`TableChangesScanData`]. -/// The scanner operates in two phases that _must_ be performed in the following order: +/// Processes a single commit file from the log to generate an iterator of +/// [`TableChangesScanMetadata`]. The scanner operates in two phases that _must_ be performed in the +/// following order: /// 1. Prepare phase [`LogReplayScanner::try_new`]: This iterates over every action in the commit. /// In this phase, we do the following: /// - Determine if there exist any `cdc` actions. We determine this in the first phase because @@ -100,7 +101,7 @@ pub(crate) fn table_changes_action_iter( /// See https://github.com/delta-io/delta-kernel-rs/issues/559 /// /// 2. Scan file generation phase [`LogReplayScanner::into_scan_batches`]: This iterates over every -/// action in the commit, and generates [`TableChangesScanData`]. It does so by transforming the +/// action in the commit, and generates [`TableChangesScanMetadata`]. It does so by transforming the /// actions using [`add_transform_expr`], and generating selection vectors with the following rules: /// - If a `cdc` action was found in the prepare phase, only `cdc` actions are selected /// - Otherwise, select `add` and `remove` actions. Note that only `remove` actions that do not @@ -125,7 +126,7 @@ struct LogReplayScanner { // generated by in-commit timestamps, that timestamp will be used instead. // // Note: This will be used once an expression is introduced to transform the engine data in - // [`TableChangesScanData`] + // [`TableChangesScanMetadata`] timestamp: i64, } @@ -154,7 +155,7 @@ impl LogReplayScanner { // As a result, we would read the file path for the remove action, which is unnecessary because // all of the rows will be filtered by the predicate. Instead, we wait until deletion // vectors are resolved so that we can skip both actions in the pair. - let action_iter = engine.get_json_handler().read_json_files( + let action_iter = engine.json_handler().read_json_files( &[commit_file.location.clone()], visitor_schema, None, // not safe to apply data skipping yet @@ -208,14 +209,14 @@ impl LogReplayScanner { remove_dvs, }) } - /// Generates an iterator of [`TableChangesScanData`] by iterating over each action of the + /// Generates an iterator of [`TableChangesScanMetadata`] by iterating over each action of the /// commit, generating a selection vector, and transforming the engine data. This performs /// phase 2 of [`LogReplayScanner`]. fn into_scan_batches( self, engine: Arc, filter: Option>, - ) -> DeltaResult>> { + ) -> DeltaResult>> { let Self { has_cdc_action, remove_dvs, @@ -226,16 +227,15 @@ impl LogReplayScanner { let remove_dvs = Arc::new(remove_dvs); let schema = FileActionSelectionVisitor::schema(); - let action_iter = engine.get_json_handler().read_json_files( - &[commit_file.location.clone()], - schema, - None, - )?; + let action_iter = + engine + .json_handler() + .read_json_files(&[commit_file.location.clone()], schema, None)?; let commit_version = commit_file .version .try_into() .map_err(|_| Error::generic("Failed to convert commit version to i64"))?; - let evaluator = engine.get_expression_handler().get_evaluator( + let evaluator = engine.evaluation_handler().new_expression_evaluator( get_log_add_schema().clone(), cdf_scan_row_expression(timestamp, commit_version), cdf_scan_row_schema().into(), @@ -255,9 +255,9 @@ impl LogReplayScanner { let mut visitor = FileActionSelectionVisitor::new(&remove_dvs, selection_vector, has_cdc_action); visitor.visit_rows_of(actions.as_ref())?; - let scan_data = evaluator.evaluate(actions.as_ref())?; - Ok(TableChangesScanData { - scan_data, + let scan_metadata = evaluator.evaluate(actions.as_ref())?; + Ok(TableChangesScanMetadata { + scan_metadata, selection_vector: visitor.selection_vector, remove_dvs: remove_dvs.clone(), }) diff --git a/kernel/src/table_changes/log_replay/tests.rs b/kernel/src/table_changes/log_replay/tests.rs index 35c4a99f8..babdde516 100644 --- a/kernel/src/table_changes/log_replay/tests.rs +++ b/kernel/src/table_changes/log_replay/tests.rs @@ -1,5 +1,5 @@ use super::table_changes_action_iter; -use super::TableChangesScanData; +use super::TableChangesScanMetadata; use crate::actions::deletion_vector::DeletionVectorDescriptor; use crate::actions::{Add, Cdc, Metadata, Protocol, Remove}; use crate::engine::sync::SyncEngine; @@ -11,7 +11,7 @@ use crate::scan::state::DvInfo; use crate::scan::PhysicalPredicate; use crate::schema::{DataType, StructField, StructType}; use crate::table_changes::log_replay::LogReplayScanner; -use crate::table_features::ReaderFeatures; +use crate::table_features::ReaderFeature; use crate::utils::test_utils::{Action, LocalMockTable}; use crate::Expression; use crate::{DeltaResult, Engine, Error, Version}; @@ -37,7 +37,7 @@ fn get_segment( let table_root = url::Url::from_directory_path(path).unwrap(); let log_root = table_root.join("_delta_log/")?; let log_segment = LogSegment::for_table_changes( - engine.get_file_system_client().as_ref(), + engine.storage_handler().as_ref(), log_root, start_version, end_version, @@ -45,8 +45,8 @@ fn get_segment( Ok(log_segment.ascending_commit_files) } -fn result_to_sv(iter: impl Iterator>) -> Vec { - iter.map_ok(|scan_data| scan_data.selection_vector.into_iter()) +fn result_to_sv(iter: impl Iterator>) -> Vec { + iter.map_ok(|scan_metadata| scan_metadata.selection_vector.into_iter()) .flatten_ok() .try_collect() .unwrap() @@ -75,8 +75,8 @@ async fn metadata_protocol() { Protocol::try_new( 3, 7, - Some([ReaderFeatures::DeletionVectors]), - Some([ReaderFeatures::ColumnMapping]), + Some([ReaderFeature::DeletionVectors]), + Some([ReaderFeature::ColumnMapping]), ) .unwrap(), ), @@ -129,10 +129,7 @@ async fn unsupported_reader_feature() { Protocol::try_new( 3, 7, - Some([ - ReaderFeatures::DeletionVectors, - ReaderFeatures::ColumnMapping, - ]), + Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]), Some([""; 0]), ) .unwrap(), @@ -297,10 +294,10 @@ async fn add_remove() { let sv = table_changes_action_iter(engine, commits, get_schema().into(), None) .unwrap() - .flat_map(|scan_data| { - let scan_data = scan_data.unwrap(); - assert_eq!(scan_data.remove_dvs, HashMap::new().into()); - scan_data.selection_vector + .flat_map(|scan_metadata| { + let scan_metadata = scan_metadata.unwrap(); + assert_eq!(scan_metadata.remove_dvs, HashMap::new().into()); + scan_metadata.selection_vector }) .collect_vec(); @@ -347,10 +344,10 @@ async fn filter_data_change() { let sv = table_changes_action_iter(engine, commits, get_schema().into(), None) .unwrap() - .flat_map(|scan_data| { - let scan_data = scan_data.unwrap(); - assert_eq!(scan_data.remove_dvs, HashMap::new().into()); - scan_data.selection_vector + .flat_map(|scan_metadata| { + let scan_metadata = scan_metadata.unwrap(); + assert_eq!(scan_metadata.remove_dvs, HashMap::new().into()); + scan_metadata.selection_vector }) .collect_vec(); @@ -393,10 +390,10 @@ async fn cdc_selection() { let sv = table_changes_action_iter(engine, commits, get_schema().into(), None) .unwrap() - .flat_map(|scan_data| { - let scan_data = scan_data.unwrap(); - assert_eq!(scan_data.remove_dvs, HashMap::new().into()); - scan_data.selection_vector + .flat_map(|scan_metadata| { + let scan_metadata = scan_metadata.unwrap(); + assert_eq!(scan_metadata.remove_dvs, HashMap::new().into()); + scan_metadata.selection_vector }) .collect_vec(); @@ -459,10 +456,10 @@ async fn dv() { .into(); let sv = table_changes_action_iter(engine, commits, get_schema().into(), None) .unwrap() - .flat_map(|scan_data| { - let scan_data = scan_data.unwrap(); - assert_eq!(scan_data.remove_dvs, expected_remove_dvs); - scan_data.selection_vector + .flat_map(|scan_metadata| { + let scan_metadata = scan_metadata.unwrap(); + assert_eq!(scan_metadata.remove_dvs, expected_remove_dvs); + scan_metadata.selection_vector }) .collect_vec(); @@ -536,9 +533,9 @@ async fn data_skipping_filter() { let sv = table_changes_action_iter(engine, commits, logical_schema.into(), predicate) .unwrap() - .flat_map(|scan_data| { - let scan_data = scan_data.unwrap(); - scan_data.selection_vector + .flat_map(|scan_metadata| { + let scan_metadata = scan_metadata.unwrap(); + scan_metadata.selection_vector }) .collect_vec(); diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index e65b0ae53..86d0f99af 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -31,7 +31,6 @@ //! let table_change_batches = table_changes_scan.execute(engine.clone())?; //! # Ok::<(), Error>(()) //! ``` -use std::collections::HashSet; use std::sync::{Arc, LazyLock}; use scan::TableChangesScanBuilder; @@ -42,7 +41,7 @@ use crate::log_segment::LogSegment; use crate::path::AsUrl; use crate::schema::{DataType, Schema, StructField, StructType}; use crate::snapshot::Snapshot; -use crate::table_features::{ColumnMappingMode, ReaderFeatures}; +use crate::table_features::{ColumnMappingMode, ReaderFeature}; use crate::table_properties::TableProperties; use crate::utils::require; use crate::{DeltaResult, Engine, Error, Version}; @@ -111,7 +110,7 @@ static CDF_FIELDS: LazyLock<[StructField; 3]> = LazyLock::new(|| { pub struct TableChanges { pub(crate) log_segment: LogSegment, table_root: Url, - end_snapshot: Snapshot, + end_snapshot: Arc, start_version: Version, schema: Schema, } @@ -140,7 +139,7 @@ impl TableChanges { ) -> DeltaResult { let log_root = table_root.join("_delta_log/")?; let log_segment = LogSegment::for_table_changes( - engine.get_file_system_client().as_ref(), + engine.storage_handler().as_ref(), log_root, start_version, end_version, @@ -149,9 +148,12 @@ impl TableChanges { // Both snapshots ensure that reading is supported at the start and end version using // `ensure_read_supported`. Note that we must still verify that reading is // supported for every protocol action in the CDF range. - let start_snapshot = - Snapshot::try_new(table_root.as_url().clone(), engine, Some(start_version))?; - let end_snapshot = Snapshot::try_new(table_root.as_url().clone(), engine, end_version)?; + let start_snapshot = Arc::new(Snapshot::try_new( + table_root.as_url().clone(), + engine, + Some(start_version), + )?); + let end_snapshot = Snapshot::try_new_from(start_snapshot.clone(), engine, end_version)?; // Verify CDF is enabled at the beginning and end of the interval using // [`check_cdf_table_properties`] to fail early. This also ensures that column mapping is @@ -252,8 +254,8 @@ fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult /// Ensures that Change Data Feed is supported for a table with this [`Protocol`] . /// See the documentation of [`TableChanges`] for more details. fn ensure_cdf_read_supported(protocol: &Protocol) -> DeltaResult<()> { - static CDF_SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| HashSet::from([ReaderFeatures::DeletionVectors])); + static CDF_SUPPORTED_READER_FEATURES: LazyLock> = + LazyLock::new(|| vec![ReaderFeature::DeletionVectors]); match &protocol.reader_features() { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if protocol.min_reader_version() == 3 => { diff --git a/kernel/src/table_changes/scan.rs b/kernel/src/table_changes/scan.rs index 4265e4805..b9bed794d 100644 --- a/kernel/src/table_changes/scan.rs +++ b/kernel/src/table_changes/scan.rs @@ -12,10 +12,10 @@ use crate::scan::{ColumnType, PhysicalPredicate, ScanResult}; use crate::schema::{SchemaRef, StructType}; use crate::{DeltaResult, Engine, ExpressionRef, FileMeta}; -use super::log_replay::{table_changes_action_iter, TableChangesScanData}; +use super::log_replay::{table_changes_action_iter, TableChangesScanMetadata}; use super::physical_to_logical::{physical_to_logical_expr, scan_file_physical_schema}; use super::resolve_dvs::{resolve_scan_file_dv, ResolvedCdfScanFile}; -use super::scan_file::scan_data_to_scan_file; +use super::scan_file::scan_metadata_to_scan_file; use super::{TableChanges, CDF_FIELDS}; /// The result of building a [`TableChanges`] scan over a table. This can be used to get the change @@ -177,15 +177,16 @@ impl TableChangesScanBuilder { } impl TableChangesScan { - /// Returns an iterator of [`TableChangesScanData`] necessary to read CDF. Each row + /// Returns an iterator of [`TableChangesScanMetadata`] necessary to read CDF. Each row /// represents an action in the delta log. These rows are filtered to yield only the actions - /// necessary to read CDF. Additionally, [`TableChangesScanData`] holds metadata on the - /// deletion vectors present in the commit. The engine data in each scan data is guaranteed - /// to belong to the same commit. Several [`TableChangesScanData`] may belong to the same commit. - fn scan_data( + /// necessary to read CDF. Additionally, [`TableChangesScanMetadata`] holds metadata on the + /// deletion vectors present in the commit. The engine data in each scan metadata is guaranteed + /// to belong to the same commit. Several [`TableChangesScanMetadata`] may belong to the same + /// commit. + fn scan_metadata( &self, engine: Arc, - ) -> DeltaResult>> { + ) -> DeltaResult>> { let commits = self .table_changes .log_segment @@ -197,7 +198,7 @@ impl TableChangesScan { PhysicalPredicate::Some(predicate, schema) => Some((predicate, schema)), PhysicalPredicate::None => None, }; - let schema = self.table_changes.end_snapshot.schema().clone().into(); + let schema = self.table_changes.end_snapshot.schema(); let it = table_changes_action_iter(engine, commits, schema, physical_predicate)?; Ok(Some(it).into_iter().flatten()) } @@ -238,8 +239,8 @@ impl TableChangesScan { &self, engine: Arc, ) -> DeltaResult>> { - let scan_data = self.scan_data(engine.clone())?; - let scan_files = scan_data_to_scan_file(scan_data); + let scan_metadata = self.scan_metadata(engine.clone())?; + let scan_files = scan_metadata_to_scan_file(scan_metadata); let global_scan_state = self.global_scan_state(); let table_root = self.table_changes.table_root().clone(); @@ -286,7 +287,7 @@ fn read_scan_file( physical_to_logical_expr(&scan_file, global_state.logical_schema.as_ref(), all_fields)?; let physical_schema = scan_file_physical_schema(&scan_file, global_state.physical_schema.as_ref()); - let phys_to_logical_eval = engine.get_expression_handler().get_evaluator( + let phys_to_logical_eval = engine.evaluation_handler().new_expression_evaluator( physical_schema.clone(), physical_to_logical_expr, global_state.logical_schema.clone().into(), @@ -301,7 +302,7 @@ fn read_scan_file( size: 0, location, }; - let read_result_iter = engine.get_parquet_handler().read_parquet_files( + let read_result_iter = engine.parquet_handler().read_parquet_files( &[file], physical_schema, physical_predicate, diff --git a/kernel/src/table_changes/scan_file.rs b/kernel/src/table_changes/scan_file.rs index f428e09df..0b7406856 100644 --- a/kernel/src/table_changes/scan_file.rs +++ b/kernel/src/table_changes/scan_file.rs @@ -6,7 +6,7 @@ use itertools::Itertools; use std::collections::HashMap; use std::sync::{Arc, LazyLock}; -use super::log_replay::TableChangesScanData; +use super::log_replay::TableChangesScanMetadata; use crate::actions::visitors::visit_deletion_vector_at; use crate::engine_data::{GetData, TypedGetData}; use crate::expressions::{column_expr, Expression}; @@ -47,17 +47,17 @@ pub(crate) struct CdfScanFile { pub(crate) type CdfScanCallback = fn(context: &mut T, scan_file: CdfScanFile); -/// Transforms an iterator of [`TableChangesScanData`] into an iterator of +/// Transforms an iterator of [`TableChangesScanMetadata`] into an iterator of /// [`CdfScanFile`] by visiting the engine data. -pub(crate) fn scan_data_to_scan_file( - scan_data: impl Iterator>, +pub(crate) fn scan_metadata_to_scan_file( + scan_metadata: impl Iterator>, ) -> impl Iterator> { - scan_data - .map(|scan_data| -> DeltaResult<_> { - let scan_data = scan_data?; + scan_metadata + .map(|scan_metadata| -> DeltaResult<_> { + let scan_metadata = scan_metadata?; let callback: CdfScanCallback> = |context, scan_file| context.push(scan_file); - Ok(visit_cdf_scan_files(&scan_data, vec![], callback)?.into_iter()) + Ok(visit_cdf_scan_files(&scan_metadata, vec![], callback)?.into_iter()) }) // Iterator-Result-Iterator .flatten_ok() // Iterator-Result } @@ -78,7 +78,7 @@ pub(crate) fn scan_data_to_scan_file( /// ## Example /// ```ignore /// let mut context = [my context]; -/// for res in scan_data { // scan data table_changes_scan.scan_data() +/// for res in scan_metadata { // scan metadata table_changes_scan.scan_metadata() /// let (data, vector, remove_dv) = res?; /// context = delta_kernel::table_changes::scan_file::visit_cdf_scan_files( /// data.as_ref(), @@ -89,18 +89,18 @@ pub(crate) fn scan_data_to_scan_file( /// } /// ``` pub(crate) fn visit_cdf_scan_files( - scan_data: &TableChangesScanData, + scan_metadata: &TableChangesScanMetadata, context: T, callback: CdfScanCallback, ) -> DeltaResult { let mut visitor = CdfScanFileVisitor { callback, context, - selection_vector: &scan_data.selection_vector, - remove_dvs: scan_data.remove_dvs.as_ref(), + selection_vector: &scan_metadata.selection_vector, + remove_dvs: scan_metadata.remove_dvs.as_ref(), }; - visitor.visit_rows_of(scan_data.scan_data.as_ref())?; + visitor.visit_rows_of(scan_metadata.scan_metadata.as_ref())?; Ok(visitor.context) } @@ -172,7 +172,7 @@ impl RowVisitor for CdfScanFileVisitor<'_, T> { } } -/// Get the schema that scan rows (from [`TableChanges::scan_data`]) will be returned with. +/// Get the schema that scan rows (from [`TableChanges::scan_metadata`]) will be returned with. pub(crate) fn cdf_scan_row_schema() -> SchemaRef { static CDF_SCAN_ROW_SCHEMA: LazyLock> = LazyLock::new(|| { let deletion_vector = StructType::new([ @@ -213,7 +213,7 @@ pub(crate) fn cdf_scan_row_schema() -> SchemaRef { } /// Expression to convert an action with `log_schema` into one with -/// [`cdf_scan_row_schema`]. This is the expression used to create [`TableChangesScanData`]. +/// [`cdf_scan_row_schema`]. This is the expression used to create [`TableChangesScanMetadata`]. pub(crate) fn cdf_scan_row_expression(commit_timestamp: i64, commit_number: i64) -> Expression { Expression::struct_from([ Expression::struct_from([ @@ -242,7 +242,7 @@ mod tests { use itertools::Itertools; - use super::{scan_data_to_scan_file, CdfScanFile, CdfScanFileType}; + use super::{scan_metadata_to_scan_file, CdfScanFile, CdfScanFileType}; use crate::actions::deletion_vector::DeletionVectorDescriptor; use crate::actions::{Add, Cdc, Remove}; use crate::engine::sync::SyncEngine; @@ -326,25 +326,23 @@ mod tests { let table_root = url::Url::from_directory_path(mock_table.table_root()).unwrap(); let log_root = table_root.join("_delta_log/").unwrap(); - let log_segment = LogSegment::for_table_changes( - engine.get_file_system_client().as_ref(), - log_root, - 0, - None, - ) - .unwrap(); + let log_segment = + LogSegment::for_table_changes(engine.storage_handler().as_ref(), log_root, 0, None) + .unwrap(); let table_schema = StructType::new([ StructField::nullable("id", DataType::INTEGER), StructField::nullable("value", DataType::STRING), ]); - let scan_data = table_changes_action_iter( + let scan_metadata = table_changes_action_iter( Arc::new(engine), log_segment.ascending_commit_files.clone(), table_schema.into(), None, ) .unwrap(); - let scan_files: Vec<_> = scan_data_to_scan_file(scan_data).try_collect().unwrap(); + let scan_files: Vec<_> = scan_metadata_to_scan_file(scan_metadata) + .try_collect() + .unwrap(); // Generate the expected [`CdfScanFile`] let timestamps = log_segment diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index 565546d52..a51a38ce7 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -8,19 +8,18 @@ //! [`TableProperties`]. //! //! [`Schema`]: crate::schema::Schema -use std::collections::HashSet; use std::sync::{Arc, LazyLock}; use url::Url; use crate::actions::{ensure_supported_features, Metadata, Protocol}; -use crate::schema::{Schema, SchemaRef}; +use crate::schema::{InvariantChecker, SchemaRef}; use crate::table_features::{ - column_mapping_mode, validate_schema_column_mapping, ColumnMappingMode, ReaderFeatures, - WriterFeatures, + column_mapping_mode, validate_schema_column_mapping, ColumnMappingMode, ReaderFeature, + WriterFeature, }; use crate::table_properties::TableProperties; -use crate::{DeltaResult, Version}; +use crate::{DeltaResult, Error, Version}; /// Holds all the configuration for a table at a specific version. This includes the supported /// reader and writer features, table properties, schema, version, and table root. This can be used @@ -33,7 +32,7 @@ use crate::{DeltaResult, Version}; /// `try_new` successfully returns `TableConfiguration`, it is also guaranteed that reading the /// table is supported. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct TableConfiguration { metadata: Metadata, protocol: Protocol, @@ -88,57 +87,101 @@ impl TableConfiguration { version, }) } + + pub(crate) fn try_new_from( + table_configuration: &Self, + new_metadata: Option, + new_protocol: Option, + new_version: Version, + ) -> DeltaResult { + // simplest case: no new P/M, just return the existing table configuration with new version + if new_metadata.is_none() && new_protocol.is_none() { + return Ok(Self { + version: new_version, + ..table_configuration.clone() + }); + } + + // note that while we could pick apart the protocol/metadata updates and validate them + // individually, instead we just re-parse so that we can recycle the try_new validation + // (instead of duplicating it here). + Self::try_new( + new_metadata.unwrap_or_else(|| table_configuration.metadata.clone()), + new_protocol.unwrap_or_else(|| table_configuration.protocol.clone()), + table_configuration.table_root.clone(), + new_version, + ) + } + /// The [`Metadata`] for this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn metadata(&self) -> &Metadata { &self.metadata } + /// The [`Protocol`] of this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn protocol(&self) -> &Protocol { &self.protocol } - /// The [`Schema`] of for this table at this version. + + /// The logical schema ([`SchemaRef`]) of this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] - pub(crate) fn schema(&self) -> &Schema { - self.schema.as_ref() + pub(crate) fn schema(&self) -> SchemaRef { + self.schema.clone() } + /// The [`TableProperties`] of this table at this version. - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn table_properties(&self) -> &TableProperties { &self.table_properties } + /// The [`ColumnMappingMode`] for this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn column_mapping_mode(&self) -> ColumnMappingMode { self.column_mapping_mode } + /// The [`Url`] of the table this [`TableConfiguration`] belongs to #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn table_root(&self) -> &Url { &self.table_root } + /// The [`Version`] which this [`TableConfiguration`] belongs to. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn version(&self) -> Version { self.version } + /// Returns `true` if the kernel supports writing to this table. This checks that the /// protocol's writer features are all supported. - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] - pub(crate) fn is_write_supported(&self) -> bool { - self.protocol.ensure_write_supported().is_ok() + pub(crate) fn ensure_write_supported(&self) -> DeltaResult<()> { + self.protocol.ensure_write_supported()?; + + // for now we don't allow invariants so although we support writer version 2 and the + // ColumnInvariant TableFeature we _must_ check here that they are not actually in use + if self.is_invariants_supported() + && InvariantChecker::has_invariants(self.schema().as_ref()) + { + return Err(Error::unsupported( + "Column invariants are not yet supported", + )); + } + + Ok(()) } + /// Returns `true` if kernel supports reading Change Data Feed on this table. /// See the documentation of [`TableChanges`] for more details. /// /// [`TableChanges`]: crate::table_changes::TableChanges #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn is_cdf_read_supported(&self) -> bool { - static CDF_SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| HashSet::from([ReaderFeatures::DeletionVectors])); + static CDF_SUPPORTED_READER_FEATURES: LazyLock> = + LazyLock::new(|| vec![ReaderFeature::DeletionVectors]); let protocol_supported = match self.protocol.reader_features() { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if self.protocol.min_reader_version() == 3 => { @@ -159,21 +202,22 @@ impl TableConfiguration { ); protocol_supported && cdf_enabled && column_mapping_disabled } + /// Returns `true` if deletion vectors is supported on this table. To support deletion vectors, /// a table must support reader version 3, writer version 7, and the deletionVectors feature in /// both the protocol's readerFeatures and writerFeatures. /// /// See: - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_supported(&self) -> bool { let read_supported = self .protocol() - .has_reader_feature(&ReaderFeatures::DeletionVectors) + .has_reader_feature(&ReaderFeature::DeletionVectors) && self.protocol.min_reader_version() == 3; let write_supported = self .protocol() - .has_writer_feature(&WriterFeatures::DeletionVectors) + .has_writer_feature(&WriterFeature::DeletionVectors) && self.protocol.min_writer_version() == 7; read_supported && write_supported } @@ -183,8 +227,8 @@ impl TableConfiguration { /// table property is set to `true`. /// /// See: - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_enabled(&self) -> bool { self.is_deletion_vector_supported() && self @@ -192,6 +236,32 @@ impl TableConfiguration { .enable_deletion_vectors .unwrap_or(false) } + + /// Returns `true` if the table supports the appendOnly table feature. To support this feature: + /// - The table must have a writer version between 2 and 7 (inclusive) + /// - If the table is on writer version 7, it must have the [`WriterFeature::AppendOnly`] + /// writer feature. + pub(crate) fn is_append_only_supported(&self) -> bool { + let protocol = &self.protocol; + match protocol.min_writer_version() { + 7 if protocol.has_writer_feature(&WriterFeature::AppendOnly) => true, + version => (2..=6).contains(&version), + } + } + + #[allow(unused)] + pub(crate) fn is_append_only_enabled(&self) -> bool { + self.is_append_only_supported() && self.table_properties.append_only.unwrap_or(false) + } + + /// Returns `true` if the table supports the column invariant table feature. + pub(crate) fn is_invariants_supported(&self) -> bool { + let protocol = &self.protocol; + match protocol.min_writer_version() { + 7 if protocol.has_writer_feature(&WriterFeature::Invariants) => true, + version => (2..=6).contains(&version), + } + } } #[cfg(test)] @@ -201,7 +271,8 @@ mod test { use url::Url; use crate::actions::{Metadata, Protocol}; - use crate::table_features::{ReaderFeatures, WriterFeatures}; + use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_properties::TableProperties; use super::TableConfiguration; @@ -218,8 +289,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::DeletionVectors]), - Some([WriterFeatures::DeletionVectors]), + Some([ReaderFeature::DeletionVectors]), + Some([WriterFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -244,8 +315,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::DeletionVectors]), - Some([WriterFeatures::DeletionVectors]), + Some([ReaderFeature::DeletionVectors]), + Some([WriterFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -259,16 +330,10 @@ mod test { schema_string: r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#.to_string(), ..Default::default() }; - let protocol = Protocol::try_new( - 3, - 7, - Some([ReaderFeatures::V2Checkpoint]), - Some([WriterFeatures::V2Checkpoint]), - ) - .unwrap(); + let protocol = Protocol::try_new(3, 7, Some(["unknown"]), Some(["unknown"])).unwrap(); let table_root = Url::try_from("file:///").unwrap(); TableConfiguration::try_new(metadata, protocol, table_root, 0) - .expect_err("V2 checkpoint is not supported in kernel"); + .expect_err("Unknown feature is not supported in kernel"); } #[test] fn dv_not_supported() { @@ -283,8 +348,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::TimestampWithoutTimezone]), - Some([WriterFeatures::TimestampWithoutTimezone]), + Some([ReaderFeature::TimestampWithoutTimezone]), + Some([WriterFeature::TimestampWithoutTimezone]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -292,4 +357,78 @@ mod test { assert!(!table_config.is_deletion_vector_supported()); assert!(!table_config.is_deletion_vector_enabled()); } + + #[test] + fn test_try_new_from() { + let schema_string =r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#.to_string(); + let metadata = Metadata { + configuration: HashMap::from_iter([( + "delta.enableChangeDataFeed".to_string(), + "true".to_string(), + )]), + schema_string: schema_string.clone(), + ..Default::default() + }; + let protocol = Protocol::try_new( + 3, + 7, + Some([ReaderFeature::DeletionVectors]), + Some([WriterFeature::DeletionVectors]), + ) + .unwrap(); + let table_root = Url::try_from("file:///").unwrap(); + let table_config = TableConfiguration::try_new(metadata, protocol, table_root, 0).unwrap(); + + let new_metadata = Metadata { + configuration: HashMap::from_iter([ + ( + "delta.enableChangeDataFeed".to_string(), + "false".to_string(), + ), + ( + "delta.enableDeletionVectors".to_string(), + "true".to_string(), + ), + ]), + schema_string, + ..Default::default() + }; + let new_protocol = Protocol::try_new( + 3, + 7, + Some([ReaderFeature::DeletionVectors, ReaderFeature::V2Checkpoint]), + Some([ + WriterFeature::DeletionVectors, + WriterFeature::V2Checkpoint, + WriterFeature::AppendOnly, + ]), + ) + .unwrap(); + let new_version = 1; + let new_table_config = TableConfiguration::try_new_from( + &table_config, + Some(new_metadata.clone()), + Some(new_protocol.clone()), + new_version, + ) + .unwrap(); + + assert_eq!(new_table_config.version(), new_version); + assert_eq!(new_table_config.metadata(), &new_metadata); + assert_eq!(new_table_config.protocol(), &new_protocol); + assert_eq!(new_table_config.schema(), table_config.schema()); + assert_eq!( + new_table_config.table_properties(), + &TableProperties { + enable_change_data_feed: Some(false), + enable_deletion_vectors: Some(true), + ..Default::default() + } + ); + assert_eq!( + new_table_config.column_mapping_mode(), + table_config.column_mapping_mode() + ); + assert_eq!(new_table_config.table_root(), table_config.table_root()); + } } diff --git a/kernel/src/table_features/column_mapping.rs b/kernel/src/table_features/column_mapping.rs index 442f742e9..4bd48b8f9 100644 --- a/kernel/src/table_features/column_mapping.rs +++ b/kernel/src/table_features/column_mapping.rs @@ -1,5 +1,5 @@ //! Code to handle column mapping, including modes and schema transforms -use super::ReaderFeatures; +use super::ReaderFeature; use crate::actions::Protocol; use crate::schema::{ColumnName, DataType, MetadataValue, Schema, SchemaTransform, StructField}; use crate::table_properties::TableProperties; @@ -36,7 +36,7 @@ pub(crate) fn column_mapping_mode( // (but should be ignored) even when the feature is not supported. For details see // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#column-mapping (Some(mode), 2) => mode, - (Some(mode), 3) if protocol.has_reader_feature(&ReaderFeatures::ColumnMapping) => mode, + (Some(mode), 3) if protocol.has_reader_feature(&ReaderFeature::ColumnMapping) => mode, _ => ColumnMappingMode::None, } } @@ -201,7 +201,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::ColumnMapping]), + Some([ReaderFeature::ColumnMapping]), empty_features.clone(), ) .unwrap(); @@ -219,7 +219,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeatures::DeletionVectors]), + Some([ReaderFeature::DeletionVectors]), empty_features.clone(), ) .unwrap(); @@ -237,10 +237,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ - ReaderFeatures::DeletionVectors, - ReaderFeatures::ColumnMapping, - ]), + Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]), empty_features, ) .unwrap(); diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index ee27fc17e..761dc8402 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -1,8 +1,10 @@ -use std::collections::HashSet; use std::sync::LazyLock; use serde::{Deserialize, Serialize}; -use strum::{AsRefStr, Display as StrumDisplay, EnumString, VariantNames}; +use strum::{AsRefStr, Display as StrumDisplay, EnumCount, EnumString}; + +use crate::actions::schemas::ToDataType; +use crate::schema::DataType; pub(crate) use column_mapping::column_mapping_mode; pub use column_mapping::{validate_schema_column_mapping, ColumnMappingMode}; @@ -24,12 +26,12 @@ mod column_mapping; EnumString, StrumDisplay, AsRefStr, - VariantNames, + EnumCount, Hash, )] #[strum(serialize_all = "camelCase")] #[serde(rename_all = "camelCase")] -pub enum ReaderFeatures { +pub enum ReaderFeature { /// Mapping of one column to another ColumnMapping, /// Deletion vectors for merge, update, delete @@ -48,6 +50,9 @@ pub enum ReaderFeatures { /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer /// protocol checks during VACUUM operations VacuumProtocolCheck, + #[serde(untagged)] + #[strum(default)] + Unknown(String), } /// Similar to reader features, writer features communicate capabilities that must be implemented @@ -65,12 +70,12 @@ pub enum ReaderFeatures { EnumString, StrumDisplay, AsRefStr, - VariantNames, + EnumCount, Hash, )] #[strum(serialize_all = "camelCase")] #[serde(rename_all = "camelCase")] -pub enum WriterFeatures { +pub enum WriterFeature { /// Append Only Tables AppendOnly, /// Table invariants @@ -109,65 +114,122 @@ pub enum WriterFeatures { /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer /// protocol checks during VACUUM operations VacuumProtocolCheck, + #[serde(untagged)] + #[strum(default)] + Unknown(String), +} + +impl ToDataType for ReaderFeature { + fn to_data_type() -> DataType { + DataType::STRING + } +} + +impl ToDataType for WriterFeature { + fn to_data_type() -> DataType { + DataType::STRING + } } -impl From for String { - fn from(feature: ReaderFeatures) -> Self { - feature.to_string() +#[cfg(test)] // currently only used in tests +impl ReaderFeature { + pub(crate) fn unknown(s: impl ToString) -> Self { + ReaderFeature::Unknown(s.to_string()) } } -impl From for String { - fn from(feature: WriterFeatures) -> Self { - feature.to_string() +#[cfg(test)] // currently only used in tests +impl WriterFeature { + pub(crate) fn unknown(s: impl ToString) -> Self { + WriterFeature::Unknown(s.to_string()) } } -// we support everything except V2 checkpoints -pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| { - HashSet::from([ - ReaderFeatures::ColumnMapping, - ReaderFeatures::DeletionVectors, - ReaderFeatures::TimestampWithoutTimezone, - ReaderFeatures::TypeWidening, - ReaderFeatures::TypeWideningPreview, - ReaderFeatures::VacuumProtocolCheck, - ]) - }); - -// write support wip: no table features are supported yet -pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = - LazyLock::new(|| HashSet::from([])); +pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| { + vec![ + ReaderFeature::ColumnMapping, + ReaderFeature::DeletionVectors, + ReaderFeature::TimestampWithoutTimezone, + ReaderFeature::TypeWidening, + ReaderFeature::TypeWideningPreview, + ReaderFeature::VacuumProtocolCheck, + ReaderFeature::V2Checkpoint, + ] +}); + +// note: we 'support' Invariants, but only insofar as we check that they are not present. +// we support writing to tables that have Invariants enabled but not used. similarly, we only +// support DeletionVectors in that we never write them (no DML). +pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = LazyLock::new(|| { + vec![ + WriterFeature::AppendOnly, + WriterFeature::DeletionVectors, + WriterFeature::Invariants, + ] +}); #[cfg(test)] mod tests { use super::*; + #[test] + fn test_unknown_features() { + let mixed_reader = &[ + ReaderFeature::DeletionVectors, + ReaderFeature::unknown("cool_feature"), + ReaderFeature::ColumnMapping, + ]; + let mixed_writer = &[ + WriterFeature::DeletionVectors, + WriterFeature::unknown("cool_feature"), + WriterFeature::AppendOnly, + ]; + + let reader_string = serde_json::to_string(mixed_reader).unwrap(); + let writer_string = serde_json::to_string(mixed_writer).unwrap(); + + assert_eq!( + &reader_string, + "[\"deletionVectors\",\"cool_feature\",\"columnMapping\"]" + ); + assert_eq!( + &writer_string, + "[\"deletionVectors\",\"cool_feature\",\"appendOnly\"]" + ); + + let typed_reader: Vec = serde_json::from_str(&reader_string).unwrap(); + let typed_writer: Vec = serde_json::from_str(&writer_string).unwrap(); + + assert_eq!(typed_reader.len(), 3); + assert_eq!(&typed_reader, mixed_reader); + assert_eq!(typed_writer.len(), 3); + assert_eq!(&typed_writer, mixed_writer); + } + #[test] fn test_roundtrip_reader_features() { let cases = [ - (ReaderFeatures::ColumnMapping, "columnMapping"), - (ReaderFeatures::DeletionVectors, "deletionVectors"), - (ReaderFeatures::TimestampWithoutTimezone, "timestampNtz"), - (ReaderFeatures::TypeWidening, "typeWidening"), - (ReaderFeatures::TypeWideningPreview, "typeWidening-preview"), - (ReaderFeatures::V2Checkpoint, "v2Checkpoint"), - (ReaderFeatures::VacuumProtocolCheck, "vacuumProtocolCheck"), + (ReaderFeature::ColumnMapping, "columnMapping"), + (ReaderFeature::DeletionVectors, "deletionVectors"), + (ReaderFeature::TimestampWithoutTimezone, "timestampNtz"), + (ReaderFeature::TypeWidening, "typeWidening"), + (ReaderFeature::TypeWideningPreview, "typeWidening-preview"), + (ReaderFeature::V2Checkpoint, "v2Checkpoint"), + (ReaderFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), + (ReaderFeature::unknown("something"), "something"), ]; - assert_eq!(ReaderFeatures::VARIANTS.len(), cases.len()); - - for ((feature, expected), name) in cases.into_iter().zip(ReaderFeatures::VARIANTS) { - assert_eq!(*name, expected); + assert_eq!(ReaderFeature::COUNT, cases.len()); + for (feature, expected) in cases { + assert_eq!(feature.to_string(), expected); let serialized = serde_json::to_string(&feature).unwrap(); assert_eq!(serialized, format!("\"{}\"", expected)); - let deserialized: ReaderFeatures = serde_json::from_str(&serialized).unwrap(); + let deserialized: ReaderFeature = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, feature); - let from_str: ReaderFeatures = expected.parse().unwrap(); + let from_str: ReaderFeature = expected.parse().unwrap(); assert_eq!(from_str, feature); } } @@ -175,37 +237,37 @@ mod tests { #[test] fn test_roundtrip_writer_features() { let cases = [ - (WriterFeatures::AppendOnly, "appendOnly"), - (WriterFeatures::Invariants, "invariants"), - (WriterFeatures::CheckConstraints, "checkConstraints"), - (WriterFeatures::ChangeDataFeed, "changeDataFeed"), - (WriterFeatures::GeneratedColumns, "generatedColumns"), - (WriterFeatures::ColumnMapping, "columnMapping"), - (WriterFeatures::IdentityColumns, "identityColumns"), - (WriterFeatures::DeletionVectors, "deletionVectors"), - (WriterFeatures::RowTracking, "rowTracking"), - (WriterFeatures::TimestampWithoutTimezone, "timestampNtz"), - (WriterFeatures::TypeWidening, "typeWidening"), - (WriterFeatures::TypeWideningPreview, "typeWidening-preview"), - (WriterFeatures::DomainMetadata, "domainMetadata"), - (WriterFeatures::V2Checkpoint, "v2Checkpoint"), - (WriterFeatures::IcebergCompatV1, "icebergCompatV1"), - (WriterFeatures::IcebergCompatV2, "icebergCompatV2"), - (WriterFeatures::VacuumProtocolCheck, "vacuumProtocolCheck"), + (WriterFeature::AppendOnly, "appendOnly"), + (WriterFeature::Invariants, "invariants"), + (WriterFeature::CheckConstraints, "checkConstraints"), + (WriterFeature::ChangeDataFeed, "changeDataFeed"), + (WriterFeature::GeneratedColumns, "generatedColumns"), + (WriterFeature::ColumnMapping, "columnMapping"), + (WriterFeature::IdentityColumns, "identityColumns"), + (WriterFeature::DeletionVectors, "deletionVectors"), + (WriterFeature::RowTracking, "rowTracking"), + (WriterFeature::TimestampWithoutTimezone, "timestampNtz"), + (WriterFeature::TypeWidening, "typeWidening"), + (WriterFeature::TypeWideningPreview, "typeWidening-preview"), + (WriterFeature::DomainMetadata, "domainMetadata"), + (WriterFeature::V2Checkpoint, "v2Checkpoint"), + (WriterFeature::IcebergCompatV1, "icebergCompatV1"), + (WriterFeature::IcebergCompatV2, "icebergCompatV2"), + (WriterFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), + (WriterFeature::unknown("something"), "something"), ]; - assert_eq!(WriterFeatures::VARIANTS.len(), cases.len()); - - for ((feature, expected), name) in cases.into_iter().zip(WriterFeatures::VARIANTS) { - assert_eq!(*name, expected); + assert_eq!(WriterFeature::COUNT, cases.len()); + for (feature, expected) in cases { + assert_eq!(feature.to_string(), expected); let serialized = serde_json::to_string(&feature).unwrap(); assert_eq!(serialized, format!("\"{}\"", expected)); - let deserialized: WriterFeatures = serde_json::from_str(&serialized).unwrap(); + let deserialized: WriterFeature = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, feature); - let from_str: WriterFeatures = expected.parse().unwrap(); + let from_str: WriterFeature = expected.parse().unwrap(); assert_eq!(from_str, feature); } } diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs index d74c2456a..2ed73b142 100644 --- a/kernel/src/transaction.rs +++ b/kernel/src/transaction.rs @@ -78,7 +78,9 @@ impl Transaction { let read_snapshot = snapshot.into(); // important! before a read/write to the table we must check it is supported - read_snapshot.protocol().ensure_write_supported()?; + read_snapshot + .table_configuration() + .ensure_write_supported()?; Ok(Transaction { read_snapshot, @@ -110,7 +112,7 @@ impl Transaction { ParsedLogPath::new_commit(self.read_snapshot.table_root(), commit_version)?; // step three: commit the actions as a json file in the log - let json_handler = engine.get_json_handler(); + let json_handler = engine.json_handler(); match json_handler.write_json_file(&commit_path.location, Box::new(actions), false) { Ok(()) => Ok(CommitResult::Committed(commit_version)), Err(Error::FileAlreadyExists(_)) => Ok(CommitResult::Conflict(self, commit_version)), @@ -149,8 +151,9 @@ impl Transaction { // for now, we just pass through all the columns except partition columns. // note this is _incorrect_ if table config deems we need partition columns. let partition_columns = &self.read_snapshot.metadata().partition_columns; - let fields = self.read_snapshot.schema().fields(); - let fields = fields + let schema = self.read_snapshot.schema(); + let fields = schema + .fields() .filter(|f| !partition_columns.contains(f.name())) .map(|f| Expression::column([f.name()])); Expression::struct_from(fields) @@ -165,11 +168,7 @@ impl Transaction { let target_dir = self.read_snapshot.table_root(); let snapshot_schema = self.read_snapshot.schema(); let logical_to_physical = self.generate_logical_to_physical(); - WriteContext::new( - target_dir.clone(), - Arc::new(snapshot_schema.clone()), - logical_to_physical, - ) + WriteContext::new(target_dir.clone(), snapshot_schema, logical_to_physical) } /// Add write metadata about files to include in the transaction. This API can be called @@ -187,7 +186,7 @@ fn generate_adds<'a>( engine: &dyn Engine, write_metadata: impl Iterator + Send + 'a, ) -> impl Iterator>> + Send + 'a { - let expression_handler = engine.get_expression_handler(); + let evaluation_handler = engine.evaluation_handler(); let write_metadata_schema = get_write_metadata_schema(); let log_schema = get_log_add_schema(); @@ -197,7 +196,7 @@ fn generate_adds<'a>( .fields() .map(|f| Expression::column([f.name()])), )]); - let adds_evaluator = expression_handler.get_evaluator( + let adds_evaluator = evaluation_handler.new_expression_evaluator( write_metadata_schema.clone(), adds_expr, log_schema.clone().into(), @@ -321,7 +320,7 @@ fn generate_commit_info( .shift_remove("inCommitTimestamp"); commit_info_field.data_type = DataType::Struct(commit_info_data_type); - let commit_info_evaluator = engine.get_expression_handler().get_evaluator( + let commit_info_evaluator = engine.evaluation_handler().new_expression_evaluator( engine_commit_info_schema.into(), commit_info_expr, commit_info_empty_struct_schema.into(), @@ -335,52 +334,51 @@ mod tests { use super::*; use crate::engine::arrow_data::ArrowEngineData; - use crate::engine::arrow_expression::ArrowExpressionHandler; + use crate::engine::arrow_expression::ArrowEvaluationHandler; use crate::schema::MapType; - use crate::{ExpressionHandler, FileSystemClient, JsonHandler, ParquetHandler}; + use crate::{EvaluationHandler, JsonHandler, ParquetHandler, StorageHandler}; - use arrow::json::writer::LineDelimitedWriter; - use arrow::record_batch::RecordBatch; - use arrow_array::builder::StringBuilder; - use arrow_schema::Schema as ArrowSchema; - use arrow_schema::{DataType as ArrowDataType, Field}; + use crate::arrow::array::{MapArray, MapBuilder, MapFieldNames, StringArray, StringBuilder}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; + use crate::arrow::error::ArrowError; + use crate::arrow::json::writer::LineDelimitedWriter; + use crate::arrow::record_batch::RecordBatch; - struct ExprEngine(Arc); + struct ExprEngine(Arc); impl ExprEngine { fn new() -> Self { - ExprEngine(Arc::new(ArrowExpressionHandler)) + ExprEngine(Arc::new(ArrowEvaluationHandler)) } } impl Engine for ExprEngine { - fn get_expression_handler(&self) -> Arc { + fn evaluation_handler(&self) -> Arc { self.0.clone() } - fn get_json_handler(&self) -> Arc { + fn json_handler(&self) -> Arc { unimplemented!() } - fn get_parquet_handler(&self) -> Arc { + fn parquet_handler(&self) -> Arc { unimplemented!() } - fn get_file_system_client(&self) -> Arc { + fn storage_handler(&self) -> Arc { unimplemented!() } } - fn build_map(entries: Vec<(&str, &str)>) -> arrow_array::MapArray { + fn build_map(entries: Vec<(&str, &str)>) -> MapArray { let key_builder = StringBuilder::new(); let val_builder = StringBuilder::new(); - let names = arrow_array::builder::MapFieldNames { + let names = MapFieldNames { entry: "entries".to_string(), key: "key".to_string(), value: "value".to_string(), }; - let mut builder = - arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder); + let mut builder = MapBuilder::new(Some(names), key_builder, val_builder); for (key, val) in entries { builder.keys().append_value(key); builder.values().append_value(val); @@ -494,7 +492,7 @@ mod tests { engine_commit_info_schema, vec![ Arc::new(map_array), - Arc::new(arrow_array::StringArray::from(vec!["some_string"])), + Arc::new(StringArray::from(vec!["some_string"])), ], )?; @@ -533,7 +531,7 @@ mod tests { )])); let commit_info_batch = RecordBatch::try_new( engine_commit_info_schema, - vec![Arc::new(arrow_array::StringArray::new_null(1))], + vec![Arc::new(StringArray::new_null(1))], )?; let _ = generate_commit_info( @@ -542,12 +540,9 @@ mod tests { &ArrowEngineData::new(commit_info_batch), ) .map_err(|e| match e { - Error::Arrow(arrow_schema::ArrowError::SchemaError(_)) => (), + Error::Arrow(ArrowError::SchemaError(_)) => (), Error::Backtraced { source, .. } - if matches!( - &*source, - Error::Arrow(arrow_schema::ArrowError::SchemaError(_)) - ) => {} + if matches!(&*source, Error::Arrow(ArrowError::SchemaError(_))) => {} _ => panic!("expected arrow schema error error, got {:?}", e), }); @@ -564,7 +559,7 @@ mod tests { )])); let commit_info_batch = RecordBatch::try_new( engine_commit_info_schema, - vec![Arc::new(arrow_array::StringArray::new_null(1))], + vec![Arc::new(StringArray::new_null(1))], )?; let _ = generate_commit_info( @@ -573,12 +568,9 @@ mod tests { &ArrowEngineData::new(commit_info_batch), ) .map_err(|e| match e { - Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(_)) => (), + Error::Arrow(ArrowError::InvalidArgumentError(_)) => (), Error::Backtraced { source, .. } - if matches!( - &*source, - Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(_)) - ) => {} + if matches!(&*source, Error::Arrow(ArrowError::InvalidArgumentError(_))) => {} _ => panic!("expected arrow invalid arg error, got {:?}", e), }); @@ -644,16 +636,16 @@ mod tests { ), true, )])); - use arrow_array::builder::StringBuilder; + let key_builder = StringBuilder::new(); let val_builder = StringBuilder::new(); - let names = arrow_array::builder::MapFieldNames { + let names = crate::arrow::array::MapFieldNames { entry: "entries".to_string(), key: "key".to_string(), value: "value".to_string(), }; let mut builder = - arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder); + crate::arrow::array::MapBuilder::new(Some(names), key_builder, val_builder); builder.append(is_null).unwrap(); let array = builder.finish(); diff --git a/kernel/src/utils.rs b/kernel/src/utils.rs index 8f4fcf818..27e5bc3cf 100644 --- a/kernel/src/utils.rs +++ b/kernel/src/utils.rs @@ -13,6 +13,12 @@ pub(crate) use require; #[cfg(test)] pub(crate) mod test_utils { + use crate::actions::get_log_schema; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use crate::engine::sync::SyncEngine; + use crate::Engine; + use itertools::Itertools; use object_store::local::LocalFileSystem; use object_store::ObjectStore; @@ -21,7 +27,11 @@ pub(crate) mod test_utils { use tempfile::TempDir; use test_utils::delta_path_for_version; - use crate::actions::{Add, Cdc, CommitInfo, Metadata, Protocol, Remove}; + use crate::{ + actions::{Add, Cdc, CommitInfo, Metadata, Protocol, Remove}, + engine::arrow_data::ArrowEngineData, + EngineData, + }; #[derive(Serialize)] pub(crate) enum Action { @@ -73,9 +83,55 @@ pub(crate) mod test_utils { .await .expect("put log file in store"); } + /// Get the path to the root of the table. pub(crate) fn table_root(&self) -> &Path { self.dir.path() } } + + /// Try to convert an `EngineData` into a `RecordBatch`. Panics if not using `ArrowEngineData` from + /// the default module + fn into_record_batch(engine_data: Box) -> RecordBatch { + ArrowEngineData::try_from_engine_data(engine_data) + .unwrap() + .into() + } + + /// Checks that two `EngineData` objects are equal by converting them to `RecordBatch` and comparing + pub(crate) fn assert_batch_matches(actual: Box, expected: Box) { + assert_eq!(into_record_batch(actual), into_record_batch(expected)); + } + + pub(crate) fn string_array_to_engine_data(string_array: StringArray) -> Box { + let string_field = Arc::new(Field::new("a", DataType::Utf8, true)); + let schema = Arc::new(ArrowSchema::new(vec![string_field])); + let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)]) + .expect("Can't convert to record batch"); + Box::new(ArrowEngineData::new(batch)) + } + + pub(crate) fn parse_json_batch(json_strings: StringArray) -> Box { + let engine = SyncEngine::new(); + let json_handler = engine.json_handler(); + let output_schema = get_log_schema().clone(); + json_handler + .parse_json(string_array_to_engine_data(json_strings), output_schema) + .unwrap() + } + + pub(crate) fn action_batch() -> Box { + let json_strings: StringArray = vec![ + r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues":{},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#, + r#"{"remove":{"path":"part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452}}"#, + r#"{"commitInfo":{"timestamp":1677811178585,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"635"},"engineInfo":"Databricks-Runtime/","txnId":"a6a94671-55ef-450e-9546-b8465b9147de"}}"#, + r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#, + r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none", "delta.enableChangeDataFeed":"true"},"createdTime":1677811175819}}"#, + r#"{"cdc":{"path":"_change_data/age=21/cdc-00000-93f7fceb-281a-446a-b221-07b88132d203.c000.snappy.parquet","partitionValues":{"age":"21"},"size":1033,"dataChange":false}}"#, + r#"{"sidecar":{"path":"016ae953-37a9-438e-8683-9a9a4a79a395.parquet","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{"tag_foo":"tag_bar"}}}"#, + r#"{"txn":{"appId":"myApp","version": 3}}"#, + ] + .into(); + parse_json_batch(json_strings) + } } diff --git a/kernel/tests/cdf.rs b/kernel/tests/cdf.rs index 2560dc71d..069018951 100644 --- a/kernel/tests/cdf.rs +++ b/kernel/tests/cdf.rs @@ -1,7 +1,7 @@ use std::{error, sync::Arc}; -use arrow::compute::filter_record_batch; -use arrow_array::RecordBatch; +use delta_kernel::arrow::array::RecordBatch; +use delta_kernel::arrow::compute::filter_record_batch; use delta_kernel::engine::sync::SyncEngine; use itertools::Itertools; diff --git a/kernel/tests/common/mod.rs b/kernel/tests/common/mod.rs index a918695b7..4268f0626 100644 --- a/kernel/tests/common/mod.rs +++ b/kernel/tests/common/mod.rs @@ -1,6 +1,6 @@ -use arrow::compute::filter_record_batch; -use arrow::record_batch::RecordBatch; -use arrow::util::pretty::pretty_format_batches; +use delta_kernel::arrow::compute::filter_record_batch; +use delta_kernel::arrow::record_batch::RecordBatch; +use delta_kernel::arrow::util::pretty::pretty_format_batches; use itertools::Itertools; use crate::ArrowEngineData; @@ -24,7 +24,7 @@ macro_rules! sort_lines { #[macro_export] macro_rules! assert_batches_sorted_eq { ($expected_lines_sorted: expr, $CHUNKS: expr) => { - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) + let formatted = delta_kernel::arrow::util::pretty::pretty_format_batches($CHUNKS) .unwrap() .to_string(); // fix for windows: \r\n --> diff --git a/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst b/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst new file mode 100644 index 000000000..dbb8aa627 Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst differ diff --git a/kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst new file mode 100644 index 000000000..a31194638 Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst differ diff --git a/kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst new file mode 100644 index 000000000..aaba3d3f8 Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst differ diff --git a/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst new file mode 100644 index 000000000..4f6833a5a Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst differ diff --git a/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst new file mode 100644 index 000000000..0f2a289b1 Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst differ diff --git a/kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst new file mode 100644 index 000000000..b90fc04b4 Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst differ diff --git a/kernel/tests/data/v2-classic-checkpoint-json.tar.zst b/kernel/tests/data/v2-classic-checkpoint-json.tar.zst new file mode 100644 index 000000000..c695339cd Binary files /dev/null and b/kernel/tests/data/v2-classic-checkpoint-json.tar.zst differ diff --git a/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst b/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst new file mode 100644 index 000000000..87bca6f59 Binary files /dev/null and b/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst differ diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs index 120271ef2..241279906 100644 --- a/kernel/tests/golden_tables.rs +++ b/kernel/tests/golden_tables.rs @@ -3,23 +3,23 @@ //! Data (golden tables) are stored in tests/golden_data/.tar.zst //! Each table directory has a table/ and expected/ subdirectory with the input/output respectively -use arrow::array::AsArray; -use arrow::{compute::filter_record_batch, record_batch::RecordBatch}; -use arrow_ord::sort::{lexsort_to_indices, SortColumn}; -use arrow_schema::{FieldRef, Schema}; -use arrow_select::{concat::concat_batches, take::take}; +use delta_kernel::arrow::array::{Array, AsArray, StructArray}; +use delta_kernel::arrow::compute::{concat_batches, take}; +use delta_kernel::arrow::compute::{lexsort_to_indices, SortColumn}; +use delta_kernel::arrow::datatypes::{DataType, FieldRef, Schema}; +use delta_kernel::arrow::{compute::filter_record_batch, record_batch::RecordBatch}; use itertools::Itertools; use paste::paste; use std::path::{Path, PathBuf}; use std::sync::Arc; +use delta_kernel::parquet::arrow::async_reader::{ + ParquetObjectReader, ParquetRecordBatchStreamBuilder, +}; use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Table}; use futures::{stream::TryStreamExt, StreamExt}; use object_store::{local::LocalFileSystem, ObjectStore}; -use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; -use arrow_array::{Array, StructArray}; -use arrow_schema::DataType; use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; use delta_kernel::engine::default::DefaultEngine; @@ -273,8 +273,8 @@ async fn canonicalized_paths_test( .into_scan_builder() .build() .expect("build the scan"); - let mut scan_data = scan.scan_data(&engine).expect("scan data"); - assert!(scan_data.next().is_none()); + let mut scan_metadata = scan.scan_metadata(&engine).expect("scan metadata"); + assert!(scan_metadata.next().is_none()); Ok(()) } @@ -289,9 +289,12 @@ async fn checkpoint_test( .into_scan_builder() .build() .expect("build the scan"); - let scan_data: Vec<_> = scan.scan_data(&engine).expect("scan data").collect(); + let scan_metadata: Vec<_> = scan + .scan_metadata(&engine) + .expect("scan metadata") + .collect(); assert_eq!(version, 14); - assert!(scan_data.len() == 1); + assert!(scan_metadata.len() == 1); Ok(()) } @@ -408,9 +411,8 @@ golden_test!("time-travel-schema-changes-b", latest_snapshot_test); golden_test!("time-travel-start", latest_snapshot_test); golden_test!("time-travel-start-start20", latest_snapshot_test); golden_test!("time-travel-start-start20-start40", latest_snapshot_test); - -skip_test!("v2-checkpoint-json": "v2 checkpoint not supported"); -skip_test!("v2-checkpoint-parquet": "v2 checkpoint not supported"); +golden_test!("v2-checkpoint-json", latest_snapshot_test); +golden_test!("v2-checkpoint-parquet", latest_snapshot_test); // BUG: // - AddFile: 'file:/some/unqualified/absolute/path' diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index 9d5d24314..2247240ff 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -3,22 +3,23 @@ use std::ops::Not; use std::path::PathBuf; use std::sync::Arc; -use arrow::compute::filter_record_batch; -use arrow_schema::SchemaRef as ArrowSchemaRef; -use arrow_select::concat::concat_batches; use delta_kernel::actions::deletion_vector::split_vector; +use delta_kernel::arrow::compute::{concat_batches, filter_record_batch}; +use delta_kernel::arrow::datatypes::SchemaRef as ArrowSchemaRef; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::expressions::{column_expr, BinaryOperator, Expression, ExpressionRef}; -use delta_kernel::scan::state::{transform_to_logical, visit_scan_files, DvInfo, Stats}; +use delta_kernel::parquet::file::properties::{EnabledStatistics, WriterProperties}; +use delta_kernel::scan::state::{transform_to_logical, DvInfo, Stats}; use delta_kernel::scan::Scan; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::{Engine, FileMeta, Table}; +use itertools::Itertools; use object_store::{memory::InMemory, path::Path, ObjectStore}; use test_utils::{ actions_to_string, add_commit, generate_batch, generate_simple_batch, into_record_batch, - record_batch_to_bytes, IntoArray, TestAction, METADATA, + record_batch_to_bytes, record_batch_to_bytes_with_props, IntoArray, TestAction, METADATA, }; use url::Url; @@ -58,7 +59,6 @@ async fn single_commit_two_add_files() -> Result<(), Box> let location = Url::parse("memory:///")?; let engine = Arc::new(DefaultEngine::new( storage.clone(), - Path::from("/"), Arc::new(TokioBackgroundExecutor::new()), )); @@ -113,11 +113,7 @@ async fn two_commits() -> Result<(), Box> { .await?; let location = Url::parse("memory:///").unwrap(); - let engine = DefaultEngine::new( - storage.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); let table = Table::new(location); let expected_data = vec![batch.clone(), batch]; @@ -171,11 +167,7 @@ async fn remove_action() -> Result<(), Box> { .await?; let location = Url::parse("memory:///").unwrap(); - let engine = DefaultEngine::new( - storage.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); let table = Table::new(location); let expected_data = vec![batch]; @@ -249,7 +241,6 @@ async fn stats() -> Result<(), Box> { let location = Url::parse("memory:///").unwrap(); let engine = Arc::new(DefaultEngine::new( storage.clone(), - Path::from(""), Arc::new(TokioBackgroundExecutor::new()), )); @@ -342,7 +333,7 @@ struct ScanFile { transform: Option, } -fn scan_data_callback( +fn scan_metadata_callback( batches: &mut Vec, path: &str, size: i64, @@ -359,7 +350,7 @@ fn scan_data_callback( }); } -fn read_with_scan_data( +fn read_with_scan_metadata( location: &Url, engine: &dyn Engine, scan: &Scan, @@ -367,17 +358,11 @@ fn read_with_scan_data( ) -> Result<(), Box> { let global_state = scan.global_scan_state(); let result_schema: ArrowSchemaRef = Arc::new(scan.schema().as_ref().try_into()?); - let scan_data = scan.scan_data(engine)?; + let scan_metadata = scan.scan_metadata(engine)?; let mut scan_files = vec![]; - for data in scan_data { - let (data, vec, transforms) = data?; - scan_files = visit_scan_files( - data.as_ref(), - &vec, - &transforms, - scan_files, - scan_data_callback, - )?; + for res in scan_metadata { + let scan_metadata = res?; + scan_files = scan_metadata.visit_scan_files(scan_files, scan_metadata_callback)?; } let mut batches = vec![]; @@ -393,7 +378,7 @@ fn read_with_scan_data( location: file_path, }; let read_results = engine - .get_parquet_handler() + .parquet_handler() .read_parquet_files( &[meta], global_state.physical_schema.clone(), @@ -471,7 +456,7 @@ fn read_table_data( .build()?; sort_lines!(expected); - read_with_scan_data(table.location(), engine.as_ref(), &scan, &expected)?; + read_with_scan_metadata(table.location(), engine.as_ref(), &scan, &expected)?; read_with_execute(engine, &scan, &expected)?; } Ok(()) @@ -576,6 +561,26 @@ fn table_for_numbers(nums: Vec) -> Vec { res } +// get the basic_partitioned table for a set of expected letters +fn table_for_letters(letters: &[char]) -> Vec { + let mut res: Vec = vec![ + "+--------+--------+", + "| letter | number |", + "+--------+--------+", + ] + .into_iter() + .map(String::from) + .collect(); + let rows = vec![(1, 'a'), (2, 'b'), (3, 'c'), (4, 'a'), (5, 'e')]; + for (num, letter) in rows { + if letters.contains(&letter) { + res.push(format!("| {letter} | {num} |")); + } + } + res.push("+--------+--------+".to_string()); + res +} + #[test] fn predicate_on_number() -> Result<(), Box> { let cases = vec![ @@ -613,6 +618,118 @@ fn predicate_on_number() -> Result<(), Box> { Ok(()) } +#[test] +fn predicate_on_letter() -> Result<(), Box> { + // Test basic column pruning. Note that the actual expression machinery is already well-tested, + // so we're just testing wiring here. + let null_row_table: Vec = vec![ + "+--------+--------+", + "| letter | number |", + "+--------+--------+", + "| | 6 |", + "+--------+--------+", + ] + .into_iter() + .map(String::from) + .collect(); + + let cases = vec![ + (column_expr!("letter").is_null(), null_row_table), + ( + column_expr!("letter").is_not_null(), + table_for_letters(&['a', 'b', 'c', 'e']), + ), + ( + column_expr!("letter").lt("c"), + table_for_letters(&['a', 'b']), + ), + ( + column_expr!("letter").le("c"), + table_for_letters(&['a', 'b', 'c']), + ), + (column_expr!("letter").gt("c"), table_for_letters(&['e'])), + ( + column_expr!("letter").ge("c"), + table_for_letters(&['c', 'e']), + ), + (column_expr!("letter").eq("c"), table_for_letters(&['c'])), + ( + column_expr!("letter").ne("c"), + table_for_letters(&['a', 'b', 'e']), + ), + ]; + + for (expr, expected) in cases { + read_table_data( + "./tests/data/basic_partitioned", + Some(&["letter", "number"]), + Some(expr), + expected, + )?; + } + Ok(()) +} + +#[test] +fn predicate_on_letter_and_number() -> Result<(), Box> { + // Partition skipping and file skipping are currently implemented separately. Mixing them in an + // AND clause will evaulate each separately, but mixing them in an OR clause disables both. + let full_table: Vec = vec![ + "+--------+--------+", + "| letter | number |", + "+--------+--------+", + "| | 6 |", + "| a | 1 |", + "| a | 4 |", + "| b | 2 |", + "| c | 3 |", + "| e | 5 |", + "+--------+--------+", + ] + .into_iter() + .map(String::from) + .collect(); + + let cases = vec![ + ( + Expression::or( + // No pruning power + column_expr!("letter").gt("a"), + column_expr!("number").gt(3i64), + ), + full_table, + ), + ( + Expression::and( + column_expr!("letter").gt("a"), // numbers 2, 3, 5 + column_expr!("number").gt(3i64), // letters a, e + ), + table_for_letters(&['e']), + ), + ( + Expression::and( + column_expr!("letter").gt("a"), // numbers 2, 3, 5 + Expression::or( + // No pruning power + column_expr!("letter").eq("c"), + column_expr!("number").eq(3i64), + ), + ), + table_for_letters(&['b', 'c', 'e']), + ), + ]; + + for (expr, expected) in cases { + read_table_data( + "./tests/data/basic_partitioned", + Some(&["letter", "number"]), + Some(expr), + expected, + )?; + } + Ok(()) +} + #[test] fn predicate_on_number_not() -> Result<(), Box> { let cases = vec![ @@ -906,6 +1023,126 @@ fn with_predicate_and_removes() -> Result<(), Box> { Ok(()) } +#[tokio::test] +async fn predicate_on_non_nullable_partition_column() -> Result<(), Box> { + // Test for https://github.com/delta-io/delta-kernel-rs/issues/698 + let batch = generate_batch(vec![("val", vec!["a", "b", "c"].into_array())])?; + + let storage = Arc::new(InMemory::new()); + let actions = [ + r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#.to_string(), + r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"id\"]"},"isBlindAppend":true}}"#.to_string(), + r#"{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":["id"],"configuration":{},"createdTime":1587968585495}}"#.to_string(), + format!(r#"{{"add":{{"path":"id=1/{PARQUET_FILE1}","partitionValues":{{"id":"1"}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#), + format!(r#"{{"add":{{"path":"id=2/{PARQUET_FILE2}","partitionValues":{{"id":"2"}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#), + ]; + + add_commit(storage.as_ref(), 0, actions.iter().join("\n")).await?; + storage + .put( + &Path::from("id=1").child(PARQUET_FILE1), + record_batch_to_bytes(&batch).into(), + ) + .await?; + storage + .put( + &Path::from("id=2").child(PARQUET_FILE2), + record_batch_to_bytes(&batch).into(), + ) + .await?; + + let location = Url::parse("memory:///")?; + let table = Table::new(location); + + let engine = Arc::new(DefaultEngine::new( + storage.clone(), + Arc::new(TokioBackgroundExecutor::new()), + )); + let snapshot = Arc::new(table.snapshot(engine.as_ref(), None)?); + + let predicate = Expression::eq(column_expr!("id"), 2); + let scan = snapshot + .scan_builder() + .with_predicate(Arc::new(predicate)) + .build()?; + + let stream = scan.execute(engine)?; + + let mut files_scanned = 0; + for engine_data in stream { + let mut result_batch = into_record_batch(engine_data?.raw_data?); + let _ = result_batch.remove_column(result_batch.schema().index_of("id")?); + assert_eq!(&batch, &result_batch); + files_scanned += 1; + } + assert_eq!(1, files_scanned); + Ok(()) +} + +#[tokio::test] +async fn predicate_on_non_nullable_column_missing_stats() -> Result<(), Box> +{ + let batch_1 = generate_batch(vec![("val", vec!["a", "b", "c"].into_array())])?; + let batch_2 = generate_batch(vec![("val", vec!["d", "e", "f"].into_array())])?; + + let storage = Arc::new(InMemory::new()); + let actions = [ + r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#.to_string(), + r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}}"#.to_string(), + r#"{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"val\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}}"#.to_string(), + // Add one file with stats, one file without + format!(r#"{{"add":{{"path":"{PARQUET_FILE1}","partitionValues":{{}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#), + format!(r#"{{"add":{{"path":"{PARQUET_FILE2}","partitionValues":{{}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{}},\"minValues\":{{}},\"maxValues\":{{}}}}"}}}}"#), + ]; + + // Disable writing Parquet statistics so these cannot be used for pruning row groups + let writer_props = WriterProperties::builder() + .set_statistics_enabled(EnabledStatistics::None) + .build(); + + add_commit(storage.as_ref(), 0, actions.iter().join("\n")).await?; + storage + .put( + &Path::from(PARQUET_FILE1), + record_batch_to_bytes_with_props(&batch_1, writer_props.clone()).into(), + ) + .await?; + storage + .put( + &Path::from(PARQUET_FILE2), + record_batch_to_bytes_with_props(&batch_2, writer_props).into(), + ) + .await?; + + let location = Url::parse("memory:///")?; + let table = Table::new(location); + + let engine = Arc::new(DefaultEngine::new( + storage.clone(), + Arc::new(TokioBackgroundExecutor::new()), + )); + let snapshot = Arc::new(table.snapshot(engine.as_ref(), None)?); + + let predicate = Expression::eq(column_expr!("val"), "g"); + let scan = snapshot + .scan_builder() + .with_predicate(Arc::new(predicate)) + .build()?; + + let stream = scan.execute(engine)?; + + let mut files_scanned = 0; + for engine_data in stream { + let result_batch = into_record_batch(engine_data?.raw_data?); + assert_eq!(&batch_2, &result_batch); + files_scanned += 1; + } + // One file is scanned as stats are missing so we don't know the predicate isn't satisfied + assert_eq!(1, files_scanned); + + Ok(()) +} + #[test] fn short_dv() -> Result<(), Box> { let expected = vec![ diff --git a/kernel/tests/v2_checkpoints.rs b/kernel/tests/v2_checkpoints.rs new file mode 100644 index 000000000..4384ed6e0 --- /dev/null +++ b/kernel/tests/v2_checkpoints.rs @@ -0,0 +1,224 @@ +use std::sync::Arc; + +use delta_kernel::arrow::array::RecordBatch; +use delta_kernel::engine::sync::SyncEngine; + +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::{DeltaResult, Table}; + +mod common; +use common::{load_test_data, read_scan}; +use itertools::Itertools; + +fn read_v2_checkpoint_table(test_name: impl AsRef) -> DeltaResult> { + let test_dir = load_test_data("tests/data", test_name.as_ref()).unwrap(); + let test_path = test_dir.path().join(test_name.as_ref()); + + let table = Table::try_from_uri(test_path.to_str().expect("table path to string")).unwrap(); + let engine = Arc::new(SyncEngine::new()); + let snapshot = table.snapshot(engine.as_ref(), None)?; + let scan = snapshot.into_scan_builder().build()?; + let batches = read_scan(&scan, engine)?; + + Ok(batches) +} + +fn test_v2_checkpoint_with_table( + table_name: &str, + mut expected_table: Vec, +) -> DeltaResult<()> { + let batches = read_v2_checkpoint_table(table_name)?; + + sort_lines!(expected_table); + assert_batches_sorted_eq!(expected_table, &batches); + Ok(()) +} + +/// Helper function to convert string slice vectors to String vectors +fn to_string_vec(string_slice_vec: Vec<&str>) -> Vec { + string_slice_vec + .into_iter() + .map(|s| s.to_string()) + .collect() +} + +fn generate_sidecar_expected_data() -> Vec { + let header = vec![ + "+-----+".to_string(), + "| id |".to_string(), + "+-----+".to_string(), + ]; + + // Generate rows for different ranges + let generate_rows = |count: usize| -> Vec { + (0..count) + .map(|id| format!("| {: Vec { + to_string_vec(vec![ + "+----+", + "| id |", + "+----+", + "| 0 |", + "| 1 |", + "| 2 |", + "| 3 |", + "| 4 |", + "| 5 |", + "| 6 |", + "| 7 |", + "| 8 |", + "| 9 |", + "+----+", + ]) +} + +// Rustfmt is disabled to maintain the readability of the expected table +#[rustfmt::skip] +fn get_classic_checkpoint_table() -> Vec { + to_string_vec(vec![ + "+----+", + "| id |", + "+----+", + "| 0 |", + "| 1 |", + "| 2 |", + "| 3 |", + "| 4 |", + "| 5 |", + "| 6 |", + "| 7 |", + "| 8 |", + "| 9 |", + "| 10 |", + "| 11 |", + "| 12 |", + "| 13 |", + "| 14 |", + "| 15 |", + "| 16 |", + "| 17 |", + "| 18 |", + "| 19 |", + "+----+", + ]) +} + +// Rustfmt is disabled to maintain the readability of the expected table +#[rustfmt::skip] +fn get_without_sidecars_table() -> Vec { + to_string_vec(vec![ + "+------+", + "| id |", + "+------+", + "| 0 |", + "| 1 |", + "| 2 |", + "| 3 |", + "| 4 |", + "| 5 |", + "| 6 |", + "| 7 |", + "| 8 |", + "| 9 |", + "| 2718 |", + "+------+", + ]) +} + +/// The test cases below are derived from delta-spark's `CheckpointSuite`. +/// +/// These tests are converted from delta-spark using the following process: +/// 1. Specific test cases of interest in `delta-spark` were modified to persist their generated tables +/// 2. These tables were compressed into `.tar.zst` archives and copied to delta-kernel-rs +/// 3. Each test loads a stored table, scans it, and asserts that the returned table state +/// matches the expected state derived from the corresponding table insertions in `delta-spark` +/// +/// The following is the ported list of `delta-spark` tests -> `delta-kernel-rs` tests: +/// +/// - `multipart v2 checkpoint` -> `v2_checkpoints_json_with_sidecars` +/// - `multipart v2 checkpoint` -> `v2_checkpoints_parquet_with_sidecars` +/// - `All actions in V2 manifest` -> `v2_checkpoints_json_without_sidecars` +/// - `All actions in V2 manifest` -> `v2_checkpoints_parquet_without_sidecars` +/// - `V2 Checkpoint compat file equivalency to normal V2 Checkpoint` -> `v2_classic_checkpoint_json` +/// - `V2 Checkpoint compat file equivalency to normal V2 Checkpoint` -> `v2_classic_checkpoint_parquet` +/// - `last checkpoint contains correct schema for v1/v2 Checkpoints` -> `v2_checkpoints_json_with_last_checkpoint` +/// - `last checkpoint contains correct schema for v1/v2 Checkpoints` -> `v2_checkpoints_parquet_with_last_checkpoint` +#[test] +fn v2_checkpoints_json_with_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-json-with-sidecars", + generate_sidecar_expected_data(), + ) +} + +#[test] +fn v2_checkpoints_parquet_with_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-parquet-with-sidecars", + generate_sidecar_expected_data(), + ) +} + +#[test] +fn v2_checkpoints_json_without_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-json-without-sidecars", + get_without_sidecars_table(), + ) +} + +#[test] +fn v2_checkpoints_parquet_without_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-parquet-without-sidecars", + get_without_sidecars_table(), + ) +} + +#[test] +fn v2_classic_checkpoint_json() -> DeltaResult<()> { + test_v2_checkpoint_with_table("v2-classic-checkpoint-json", get_classic_checkpoint_table()) +} + +#[test] +fn v2_classic_checkpoint_parquet() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-classic-checkpoint-parquet", + get_classic_checkpoint_table(), + ) +} + +#[test] +fn v2_checkpoints_json_with_last_checkpoint() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-json-with-last-checkpoint", + get_simple_id_table(), + ) +} + +#[test] +fn v2_checkpoints_parquet_with_last_checkpoint() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-parquet-with-last-checkpoint", + get_simple_id_table(), + ) +} diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs index 2ee6dfdd5..eb3671595 100644 --- a/kernel/tests/write.rs +++ b/kernel/tests/write.rs @@ -1,10 +1,12 @@ use std::collections::HashMap; use std::sync::Arc; -use arrow::array::{Int32Array, StringArray}; -use arrow::record_batch::RecordBatch; -use arrow_schema::Schema as ArrowSchema; -use arrow_schema::{DataType as ArrowDataType, Field}; +use delta_kernel::arrow::array::{ + Int32Array, MapBuilder, MapFieldNames, StringArray, StringBuilder, +}; +use delta_kernel::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; +use delta_kernel::arrow::error::ArrowError; +use delta_kernel::arrow::record_batch::RecordBatch; use itertools::Itertools; use object_store::local::LocalFileSystem; use object_store::memory::InMemory; @@ -46,7 +48,7 @@ fn setup( let table_root_path = Path::from(format!("{base_path}{table_name}")); let url = Url::parse(&format!("{base_url}{table_root_path}/")).unwrap(); let executor = Arc::new(TokioBackgroundExecutor::new()); - let engine = DefaultEngine::new(Arc::clone(&storage), table_root_path, executor); + let engine = DefaultEngine::new(Arc::clone(&storage), executor); (storage, engine, url) } @@ -58,18 +60,28 @@ async fn create_table( table_path: Url, schema: SchemaRef, partition_columns: &[&str], + use_37_protocol: bool, ) -> Result> { let table_id = "test_id"; let schema = serde_json::to_string(&schema)?; - let protocol = json!({ - "protocol": { - "minReaderVersion": 3, - "minWriterVersion": 7, - "readerFeatures": [], - "writerFeatures": [] - } - }); + let protocol = if use_37_protocol { + json!({ + "protocol": { + "minReaderVersion": 3, + "minWriterVersion": 7, + "readerFeatures": [], + "writerFeatures": [] + } + }) + } else { + json!({ + "protocol": { + "minReaderVersion": 1, + "minWriterVersion": 1, + } + }) + }; let metadata = json!({ "metaData": { "id": table_id, @@ -120,15 +132,14 @@ fn new_commit_info() -> DeltaResult> { false, )])); - use arrow_array::builder::StringBuilder; let key_builder = StringBuilder::new(); let val_builder = StringBuilder::new(); - let names = arrow_array::builder::MapFieldNames { + let names = MapFieldNames { entry: "entries".to_string(), key: "key".to_string(), value: "value".to_string(), }; - let mut builder = arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder); + let mut builder = MapBuilder::new(Some(names), key_builder, val_builder); builder.keys().append_value("engineInfo"); builder.values().append_value("default engine"); builder.append(true).unwrap(); @@ -139,56 +150,99 @@ fn new_commit_info() -> DeltaResult> { Ok(Box::new(ArrowEngineData::new(commit_info_batch))) } +async fn setup_tables( + schema: SchemaRef, + partition_columns: &[&str], +) -> Result< + Vec<( + Table, + DefaultEngine, + Arc, + &'static str, + )>, + Box, +> { + let (store_37, engine_37, table_location_37) = setup("test_table_37", true); + let (store_11, engine_11, table_location_11) = setup("test_table_11", true); + Ok(vec![ + ( + create_table( + store_37.clone(), + table_location_37, + schema.clone(), + partition_columns, + true, + ) + .await?, + engine_37, + store_37, + "test_table_37", + ), + ( + create_table( + store_11.clone(), + table_location_11, + schema, + partition_columns, + false, + ) + .await?, + engine_11, + store_11, + "test_table_11", + ), + ]) +} + #[tokio::test] async fn test_commit_info() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema, &[]).await?; - - let commit_info = new_commit_info()?; - - // create a transaction - let txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); - - // commit! - txn.commit(&engine)?; - - let commit1 = store - .get(&Path::from( - "/test_table/_delta_log/00000000000000000001.json", - )) - .await?; - - let mut parsed_commit: serde_json::Value = serde_json::from_slice(&commit1.bytes().await?)?; - *parsed_commit - .get_mut("commitInfo") - .unwrap() - .get_mut("timestamp") - .unwrap() = serde_json::Value::Number(0.into()); - - let expected_commit = json!({ - "commitInfo": { - "timestamp": 0, - "operation": "UNKNOWN", - "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), - "operationParameters": {}, - "engineCommitInfo": { - "engineInfo": "default engine" + + for (table, engine, store, table_name) in setup_tables(schema, &[]).await? { + let commit_info = new_commit_info()?; + + // create a transaction + let txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); + + // commit! + txn.commit(&engine)?; + + let commit1 = store + .get(&Path::from(format!( + "/{table_name}/_delta_log/00000000000000000001.json" + ))) + .await?; + + let mut parsed_commit: serde_json::Value = serde_json::from_slice(&commit1.bytes().await?)?; + *parsed_commit + .get_mut("commitInfo") + .unwrap() + .get_mut("timestamp") + .unwrap() = serde_json::Value::Number(0.into()); + + let expected_commit = json!({ + "commitInfo": { + "timestamp": 0, + "operation": "UNKNOWN", + "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), + "operationParameters": {}, + "engineCommitInfo": { + "engineInfo": "default engine" + } } - } - }); + }); - assert_eq!(parsed_commit, expected_commit); + assert_eq!(parsed_commit, expected_commit); + } Ok(()) } @@ -196,21 +250,18 @@ async fn test_commit_info() -> Result<(), Box> { async fn test_empty_commit() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); - // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema, &[]).await?; - - assert!(matches!( - table.new_transaction(&engine)?.commit(&engine).unwrap_err(), - KernelError::MissingCommitInfo - )); + for (table, engine, _store, _table_name) in setup_tables(schema, &[]).await? { + assert!(matches!( + table.new_transaction(&engine)?.commit(&engine).unwrap_err(), + KernelError::MissingCommitInfo + )); + } Ok(()) } @@ -218,53 +269,51 @@ async fn test_empty_commit() -> Result<(), Box> { async fn test_invalid_commit_info() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema, &[]).await?; - - // empty commit info test - let commit_info_schema = Arc::new(ArrowSchema::empty()); - let commit_info_batch = RecordBatch::new_empty(commit_info_schema.clone()); - assert!(commit_info_batch.num_rows() == 0); - let txn = table - .new_transaction(&engine)? - .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); - - // commit! - assert!(matches!( - txn.commit(&engine), - Err(KernelError::InvalidCommitInfo(_)) - )); - - // two-row commit info test - let commit_info_schema = Arc::new(ArrowSchema::new(vec![Field::new( - "engineInfo", - ArrowDataType::Utf8, - true, - )])); - let commit_info_batch = RecordBatch::try_new( - commit_info_schema.clone(), - vec![Arc::new(StringArray::from(vec![ - "row1: default engine", - "row2: default engine", - ]))], - )?; - - let txn = table - .new_transaction(&engine)? - .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); - - // commit! - assert!(matches!( - txn.commit(&engine), - Err(KernelError::InvalidCommitInfo(_)) - )); + for (table, engine, _store, _table_name) in setup_tables(schema, &[]).await? { + // empty commit info test + let commit_info_schema = Arc::new(ArrowSchema::empty()); + let commit_info_batch = RecordBatch::new_empty(commit_info_schema.clone()); + assert!(commit_info_batch.num_rows() == 0); + let txn = table + .new_transaction(&engine)? + .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); + + // commit! + assert!(matches!( + txn.commit(&engine), + Err(KernelError::InvalidCommitInfo(_)) + )); + + // two-row commit info test + let commit_info_schema = Arc::new(ArrowSchema::new(vec![Field::new( + "engineInfo", + ArrowDataType::Utf8, + true, + )])); + let commit_info_batch = RecordBatch::try_new( + commit_info_schema.clone(), + vec![Arc::new(StringArray::from(vec![ + "row1: default engine", + "row2: default engine", + ]))], + )?; + + let txn = table + .new_transaction(&engine)? + .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); + + // commit! + assert!(matches!( + txn.commit(&engine), + Err(KernelError::InvalidCommitInfo(_)) + )); + } Ok(()) } @@ -329,125 +378,123 @@ async fn get_and_check_all_parquet_sizes(store: Arc, path: &str async fn test_append() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); - // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema.clone(), &[]).await?; - let commit_info = new_commit_info()?; + for (table, engine, store, table_name) in setup_tables(schema.clone(), &[]).await? { + let commit_info = new_commit_info()?; - let mut txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); + let mut txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); - // create two new arrow record batches to append - let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { - let data = RecordBatch::try_new( - Arc::new(schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::Int32Array::from(data.to_vec()))], - )?; - Ok(Box::new(ArrowEngineData::new(data))) - }); + // create two new arrow record batches to append + let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { + let data = RecordBatch::try_new( + Arc::new(schema.as_ref().try_into()?), + vec![Arc::new(Int32Array::from(data.to_vec()))], + )?; + Ok(Box::new(ArrowEngineData::new(data))) + }); - // write data out by spawning async tasks to simulate executors - let engine = Arc::new(engine); - let write_context = Arc::new(txn.get_write_context()); - let tasks = append_data.into_iter().map(|data| { - // arc clones - let engine = engine.clone(); - let write_context = write_context.clone(); - tokio::task::spawn(async move { - engine - .write_parquet( - data.as_ref().unwrap(), - write_context.as_ref(), - HashMap::new(), - true, - ) - .await - }) - }); + // write data out by spawning async tasks to simulate executors + let engine = Arc::new(engine); + let write_context = Arc::new(txn.get_write_context()); + let tasks = append_data.into_iter().map(|data| { + // arc clones + let engine = engine.clone(); + let write_context = write_context.clone(); + tokio::task::spawn(async move { + engine + .write_parquet( + data.as_ref().unwrap(), + write_context.as_ref(), + HashMap::new(), + true, + ) + .await + }) + }); - let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); - for meta in write_metadata { - txn.add_write_metadata(meta?); - } + let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); + for meta in write_metadata { + txn.add_write_metadata(meta?); + } - // commit! - txn.commit(engine.as_ref())?; - - let commit1 = store - .get(&Path::from( - "/test_table/_delta_log/00000000000000000001.json", - )) - .await?; - - let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) - .into_iter::() - .try_collect()?; - - let size = get_and_check_all_parquet_sizes(store.clone(), "/test_table/").await; - // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() - // before we clear them for comparison - check_action_timestamps(parsed_commits.iter())?; - - // set timestamps to 0 and paths to known string values for comparison - // (otherwise timestamps are non-deterministic and paths are random UUIDs) - set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; - set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; - set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; - - let expected_commit = vec![ - json!({ - "commitInfo": { - "timestamp": 0, - "operation": "UNKNOWN", - "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), - "operationParameters": {}, - "engineCommitInfo": { - "engineInfo": "default engine" + // commit! + txn.commit(engine.as_ref())?; + + let commit1 = store + .get(&Path::from(format!( + "/{table_name}/_delta_log/00000000000000000001.json" + ))) + .await?; + + let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) + .into_iter::() + .try_collect()?; + + let size = + get_and_check_all_parquet_sizes(store.clone(), format!("/{table_name}/").as_str()) + .await; + // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() + // before we clear them for comparison + check_action_timestamps(parsed_commits.iter())?; + + // set timestamps to 0 and paths to known string values for comparison + // (otherwise timestamps are non-deterministic and paths are random UUIDs) + set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; + set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; + set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; + + let expected_commit = vec![ + json!({ + "commitInfo": { + "timestamp": 0, + "operation": "UNKNOWN", + "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), + "operationParameters": {}, + "engineCommitInfo": { + "engineInfo": "default engine" + } } - } - }), - json!({ - "add": { - "path": "first.parquet", - "partitionValues": {}, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - json!({ - "add": { - "path": "second.parquet", - "partitionValues": {}, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - ]; - - assert_eq!(parsed_commits, expected_commit); - - test_read( - &ArrowEngineData::new(RecordBatch::try_new( - Arc::new(schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::Int32Array::from(vec![ - 1, 2, 3, 4, 5, 6, - ]))], - )?), - &table, - engine, - )?; + }), + json!({ + "add": { + "path": "first.parquet", + "partitionValues": {}, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + json!({ + "add": { + "path": "second.parquet", + "partitionValues": {}, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + ]; + + assert_eq!(parsed_commits, expected_commit); + + test_read( + &ArrowEngineData::new(RecordBatch::try_new( + Arc::new(schema.as_ref().try_into()?), + vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]))], + )?), + &table, + engine, + )?; + } Ok(()) } @@ -455,8 +502,7 @@ async fn test_append() -> Result<(), Box> { async fn test_append_partitioned() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); + let partition_col = "partition"; // create a simple partitioned table: one int column named 'number', partitioned by string @@ -469,132 +515,131 @@ async fn test_append_partitioned() -> Result<(), Box> { "number", DataType::INTEGER, )])); - let table = create_table( - store.clone(), - table_location, - table_schema.clone(), - &[partition_col], - ) - .await?; - - let commit_info = new_commit_info()?; - - let mut txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); - - // create two new arrow record batches to append - let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { - let data = RecordBatch::try_new( - Arc::new(data_schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::Int32Array::from(data.to_vec()))], - )?; - Ok(Box::new(ArrowEngineData::new(data))) - }); - let partition_vals = vec!["a", "b"]; - // write data out by spawning async tasks to simulate executors - let engine = Arc::new(engine); - let write_context = Arc::new(txn.get_write_context()); - let tasks = append_data - .into_iter() - .zip(partition_vals) - .map(|(data, partition_val)| { - // arc clones - let engine = engine.clone(); - let write_context = write_context.clone(); - tokio::task::spawn(async move { - engine - .write_parquet( - data.as_ref().unwrap(), - write_context.as_ref(), - HashMap::from([(partition_col.to_string(), partition_val.to_string())]), - true, - ) - .await - }) + for (table, engine, store, table_name) in + setup_tables(table_schema.clone(), &[partition_col]).await? + { + let commit_info = new_commit_info()?; + + let mut txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); + + // create two new arrow record batches to append + let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { + let data = RecordBatch::try_new( + Arc::new(data_schema.as_ref().try_into()?), + vec![Arc::new(Int32Array::from(data.to_vec()))], + )?; + Ok(Box::new(ArrowEngineData::new(data))) }); + let partition_vals = vec!["a", "b"]; + + // write data out by spawning async tasks to simulate executors + let engine = Arc::new(engine); + let write_context = Arc::new(txn.get_write_context()); + let tasks = append_data + .into_iter() + .zip(partition_vals) + .map(|(data, partition_val)| { + // arc clones + let engine = engine.clone(); + let write_context = write_context.clone(); + tokio::task::spawn(async move { + engine + .write_parquet( + data.as_ref().unwrap(), + write_context.as_ref(), + HashMap::from([(partition_col.to_string(), partition_val.to_string())]), + true, + ) + .await + }) + }); + + let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); + for meta in write_metadata { + txn.add_write_metadata(meta?); + } - let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); - for meta in write_metadata { - txn.add_write_metadata(meta?); - } - - // commit! - txn.commit(engine.as_ref())?; - - let commit1 = store - .get(&Path::from( - "/test_table/_delta_log/00000000000000000001.json", - )) - .await?; - - let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) - .into_iter::() - .try_collect()?; - - let size = get_and_check_all_parquet_sizes(store.clone(), "/test_table/").await; - // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() - // before we clear them for comparison - check_action_timestamps(parsed_commits.iter())?; - - // set timestamps to 0 and paths to known string values for comparison - // (otherwise timestamps are non-deterministic and paths are random UUIDs) - set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; - set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; - set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; - - let expected_commit = vec![ - json!({ - "commitInfo": { - "timestamp": 0, - "operation": "UNKNOWN", - "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), - "operationParameters": {}, - "engineCommitInfo": { - "engineInfo": "default engine" + // commit! + txn.commit(engine.as_ref())?; + + let commit1 = store + .get(&Path::from(format!( + "/{table_name}/_delta_log/00000000000000000001.json" + ))) + .await?; + + let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) + .into_iter::() + .try_collect()?; + + let size = + get_and_check_all_parquet_sizes(store.clone(), format!("/{table_name}/").as_str()) + .await; + // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() + // before we clear them for comparison + check_action_timestamps(parsed_commits.iter())?; + + // set timestamps to 0 and paths to known string values for comparison + // (otherwise timestamps are non-deterministic and paths are random UUIDs) + set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; + set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; + set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; + + let expected_commit = vec![ + json!({ + "commitInfo": { + "timestamp": 0, + "operation": "UNKNOWN", + "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), + "operationParameters": {}, + "engineCommitInfo": { + "engineInfo": "default engine" + } } - } - }), - json!({ - "add": { - "path": "first.parquet", - "partitionValues": { - "partition": "a" - }, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - json!({ - "add": { - "path": "second.parquet", - "partitionValues": { - "partition": "b" - }, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - ]; - - assert_eq!(parsed_commits, expected_commit); - - test_read( - &ArrowEngineData::new(RecordBatch::try_new( - Arc::new(table_schema.as_ref().try_into()?), - vec![ - Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])), - Arc::new(StringArray::from(vec!["a", "a", "a", "b", "b", "b"])), - ], - )?), - &table, - engine, - )?; + }), + json!({ + "add": { + "path": "first.parquet", + "partitionValues": { + "partition": "a" + }, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + json!({ + "add": { + "path": "second.parquet", + "partitionValues": { + "partition": "b" + }, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + ]; + + assert_eq!(parsed_commits, expected_commit); + + test_read( + &ArrowEngineData::new(RecordBatch::try_new( + Arc::new(table_schema.as_ref().try_into()?), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])), + Arc::new(StringArray::from(vec!["a", "a", "a", "b", "b", "b"])), + ], + )?), + &table, + engine, + )?; + } Ok(()) } @@ -602,9 +647,6 @@ async fn test_append_partitioned() -> Result<(), Box> { async fn test_append_invalid_schema() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); - // create a simple table: one int column named 'number' let table_schema = Arc::new(StructType::new(vec![StructField::nullable( "number", @@ -615,52 +657,50 @@ async fn test_append_invalid_schema() -> Result<(), Box> "string", DataType::STRING, )])); - let table = create_table(store.clone(), table_location, table_schema.clone(), &[]).await?; - let commit_info = new_commit_info()?; + for (table, engine, _store, _table_name) in setup_tables(table_schema, &[]).await? { + let commit_info = new_commit_info()?; - let txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); + let txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); - // create two new arrow record batches to append - let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> { - let data = RecordBatch::try_new( - Arc::new(data_schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::StringArray::from(data.to_vec()))], - )?; - Ok(Box::new(ArrowEngineData::new(data))) - }); + // create two new arrow record batches to append + let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> { + let data = RecordBatch::try_new( + Arc::new(data_schema.as_ref().try_into()?), + vec![Arc::new(StringArray::from(data.to_vec()))], + )?; + Ok(Box::new(ArrowEngineData::new(data))) + }); - // write data out by spawning async tasks to simulate executors - let engine = Arc::new(engine); - let write_context = Arc::new(txn.get_write_context()); - let tasks = append_data.into_iter().map(|data| { - // arc clones - let engine = engine.clone(); - let write_context = write_context.clone(); - tokio::task::spawn(async move { - engine - .write_parquet( - data.as_ref().unwrap(), - write_context.as_ref(), - HashMap::new(), - true, - ) - .await - }) - }); + // write data out by spawning async tasks to simulate executors + let engine = Arc::new(engine); + let write_context = Arc::new(txn.get_write_context()); + let tasks = append_data.into_iter().map(|data| { + // arc clones + let engine = engine.clone(); + let write_context = write_context.clone(); + tokio::task::spawn(async move { + engine + .write_parquet( + data.as_ref().unwrap(), + write_context.as_ref(), + HashMap::new(), + true, + ) + .await + }) + }); - let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); - assert!(write_metadata.all(|res| match res { - Err(KernelError::Arrow(arrow_schema::ArrowError::SchemaError(_))) => true, - Err(KernelError::Backtraced { source, .. }) - if matches!( - &*source, - KernelError::Arrow(arrow_schema::ArrowError::SchemaError(_)) - ) => - true, - _ => false, - })); + let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); + assert!(write_metadata.all(|res| match res { + Err(KernelError::Arrow(ArrowError::SchemaError(_))) => true, + Err(KernelError::Backtraced { source, .. }) + if matches!(&*source, KernelError::Arrow(ArrowError::SchemaError(_))) => + true, + _ => false, + })); + } Ok(()) } diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 0a90e96ed..20df4a524 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -12,9 +12,6 @@ version.workspace = true release = false [dependencies] -arrow-array = { workspace = true, features = ["chrono-tz"] } -arrow-schema = { workspace = true } -delta_kernel = { path = "../kernel", features = [ "default-engine" ] } +delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] } itertools = "0.13.0" object_store = { workspace = true } -parquet = { workspace = true } diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs index 2605bea56..a8b7c6610 100644 --- a/test-utils/src/lib.rs +++ b/test-utils/src/lib.rs @@ -2,44 +2,68 @@ use std::sync::Arc; -use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray}; -use arrow_schema::ArrowError; +use delta_kernel::arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; +use delta_kernel::arrow::error::ArrowError; use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::parquet::arrow::arrow_writer::ArrowWriter; +use delta_kernel::parquet::file::properties::WriterProperties; use delta_kernel::EngineData; use itertools::Itertools; use object_store::{path::Path, ObjectStore}; -use parquet::arrow::arrow_writer::ArrowWriter; -use parquet::file::properties::WriterProperties; /// A common useful initial metadata and protocol. Also includes a single commitInfo pub const METADATA: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} {"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}}"#; +/// A common useful initial metadata and protocol. Also includes a single commitInfo +pub const METADATA_WITH_PARTITION_COLS: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["val"],"configuration":{},"createdTime":1587968585495}}"#; + pub enum TestAction { Add(String), Remove(String), Metadata, } -/// Convert a vector of actions into a newline delimited json string +// TODO: We need a better way to mock tables :) + +/// Convert a vector of actions into a newline delimited json string, with standard metadata pub fn actions_to_string(actions: Vec) -> String { + actions_to_string_with_metadata(actions, METADATA) +} + +/// Convert a vector of actions into a newline delimited json string, with metadata including a partition column +pub fn actions_to_string_partitioned(actions: Vec) -> String { + actions_to_string_with_metadata(actions, METADATA_WITH_PARTITION_COLS) +} + +fn actions_to_string_with_metadata(actions: Vec, metadata: &str) -> String { actions - .into_iter() - .map(|test_action| match test_action { - TestAction::Add(path) => format!(r#"{{"add":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 1}},\"maxValues\":{{\"id\":3}}}}"}}}}"#), - TestAction::Remove(path) => format!(r#"{{"remove":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"#), - TestAction::Metadata => METADATA.into(), - }) - .join("\n") + .into_iter() + .map(|test_action| match test_action { + TestAction::Add(path) => format!(r#"{{"add":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 1}},\"maxValues\":{{\"id\":3}}}}"}}}}"#), + TestAction::Remove(path) => format!(r#"{{"remove":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"#), + TestAction::Metadata => metadata.into(), + }) + .join("\n") } /// convert a RecordBatch into a vector of bytes. We can't use `From` since these are both foreign /// types pub fn record_batch_to_bytes(batch: &RecordBatch) -> Vec { - let mut data: Vec = Vec::new(); let props = WriterProperties::builder().build(); - let mut writer = ArrowWriter::try_new(&mut data, batch.schema(), Some(props)).unwrap(); + record_batch_to_bytes_with_props(batch, props) +} + +pub fn record_batch_to_bytes_with_props( + batch: &RecordBatch, + writer_properties: WriterProperties, +) -> Vec { + let mut data: Vec = Vec::new(); + let mut writer = + ArrowWriter::try_new(&mut data, batch.schema(), Some(writer_properties)).unwrap(); writer.write(batch).expect("Writing batch"); // writer must be closed to write footer writer.close().unwrap();