diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 000000000..ddff4407b
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[build]
+rustflags = ["-C", "target-cpu=native"]
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a8a24dd07..36cf67320 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -10,27 +10,20 @@ jobs:
   format:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - name: Install minimal stable with clippy and rustfmt
-        uses: actions-rs/toolchain@v1
+      - uses: actions/checkout@v4
+      - name: Install minimal stable with rustfmt
+        uses: actions-rust-lang/setup-rust-toolchain@v1
         with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
+          components: rustfmt
       - name: format
         run: cargo fmt -- --check
+
   msrv:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install minimal stable and cargo msrv
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - name: Install cargo-msrv
         shell: bash
         run: |
@@ -46,11 +39,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install minimal stable and cargo msrv
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - uses: Swatinem/rust-cache@v2
       - name: Install cargo-msrv
         shell: bash
@@ -74,16 +63,34 @@ jobs:
     env:
       RUSTDOCFLAGS: -D warnings
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install minimal stable
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - name: build docs
         run: cargo doc
+
+
+  # When we run cargo { build, clippy } --no-default-features, we want to build/lint the kernel to
+  # ensure that we can build the kernel without any features enabled. Unfortunately, due to how
+  # cargo resolves features, if we have a workspace member that depends on the kernel with features
+  # enabled, the kernel will be compiled with those features (even if we specify
+  # --no-default-features).
+  #
+  # To cope with this, we split build/clippy --no-default-features into two runs:
+  # 1. build/clippy all packages that depend on the kernel with some features enabled:
+  #    - acceptance
+  #    - test_utils
+  #    - feature_tests
+  #    (and examples)
+  #    - inspect-table
+  #    - read-table-changes
+  #    - read-table-multi-threaded
+  #    - read-table-single-threaded
+  # 2. build/clippy all packages that only have no-feature kernel dependency
+  #    - delta_kernel
+  #    - delta_kernel_derive
+  #    - delta_kernel_ffi
+  #    - delta_kernel_ffi_macros
   build:
     runs-on: ${{ matrix.os }}
     strategy:
@@ -93,20 +100,17 @@ jobs:
           - ubuntu-latest
           - windows-latest
     steps:
-      - uses: actions/checkout@v3
-      - name: Install minimal stable with clippy and rustfmt
-        uses: actions-rs/toolchain@v1
+      - uses: actions/checkout@v4
+      - name: Install minimal stable with clippy
+        uses: actions-rust-lang/setup-rust-toolchain@v1
         with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
-      - name: check kernel builds with no-default-features
-        run: cargo build -p delta_kernel --no-default-features
+          components: clippy
       - name: build and lint with clippy
         run: cargo clippy --benches --tests --all-features -- -D warnings
-      - name: lint without default features
-        run: cargo clippy --no-default-features -- -D warnings
+      - name: lint without default features - packages which depend on kernel with features enabled
+        run: cargo clippy --workspace --no-default-features --exclude delta_kernel --exclude delta_kernel_ffi --exclude delta_kernel_derive --exclude delta_kernel_ffi_macros -- -D warnings
+      - name: lint without default features - packages which don't depend on kernel with features enabled
+        run: cargo clippy --no-default-features --package delta_kernel --package delta_kernel_ffi --package delta_kernel_derive --package delta_kernel_ffi_macros -- -D warnings
       - name: check kernel builds with default-engine
         run: cargo build -p feature_tests --features default-engine
       - name: check kernel builds with default-engine-rustls
@@ -120,14 +124,9 @@ jobs:
           - ubuntu-latest
           - windows-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install minimal stable with clippy and rustfmt
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - name: test
         run: cargo test --workspace --verbose --all-features -- --skip read_table_version_hdfs
 
@@ -220,14 +219,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install rust
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - name: Install cargo-llvm-cov
         uses: taiki-e/install-action@cargo-llvm-cov
-      - uses: Swatinem/rust-cache@v2
       - name: Generate code coverage
         run: cargo llvm-cov --all-features --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs
       - name: Upload coverage to Codecov
diff --git a/.github/workflows/run_integration_test.yml b/.github/workflows/run_integration_test.yml
index 1ff681cf6..73ffd599c 100644
--- a/.github/workflows/run_integration_test.yml
+++ b/.github/workflows/run_integration_test.yml
@@ -18,17 +18,11 @@ jobs:
       - name: Skip job for pull requests on Windows
         if: ${{ matrix.skip }}
         run: echo "Skipping job for pull requests on Windows."
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         if: ${{ !matrix.skip }}
-      - name: Install minimal stable rust
-        if: ${{ !matrix.skip }}
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
+      - name: Setup rust toolchain
         if: ${{ !matrix.skip }}
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - name: Run integration tests
         if: ${{ !matrix.skip }}
         shell: bash
diff --git a/.github/workflows/semver-checks.yml b/.github/workflows/semver-checks.yml
index 3f0374452..7bd39b167 100644
--- a/.github/workflows/semver-checks.yml
+++ b/.github/workflows/semver-checks.yml
@@ -25,12 +25,7 @@ jobs:
           fetch-depth: 0
           ref: ${{ github.event.pull_request.head.sha }}
       - name: Install minimal stable
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: default
-          toolchain: stable
-          override: true
-      - uses: Swatinem/rust-cache@v2
+        uses: actions-rust-lang/setup-rust-toolchain@v1
       - name: Install cargo-semver-checks
         shell: bash
         run: |
diff --git a/.gitignore b/.gitignore
index ba9bf6241..b057be7f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,6 @@
 .zed
 
 # Rust
-.cargo/
 target/
 integration-tests/Cargo.lock
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d403a1fd..bf2228ebc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,239 @@
 # Changelog
 
+## [v0.9.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.9.0/) (2025-04-08)
+
+[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.8.0...v0.9.0)
+
+### 🏗️ Breaking changes
+1. Change `MetadataValue::Number(i32)` to `MetadataValue::Number(i64)` ([#733])
+2. Get prefix from offset path: `DefaultEngine::new` no longer requires a `table_root` parameter
+   and `list_from` consistently returns keys greater than the offset ([#699])
+3. Make `snapshot.schema()` return a `SchemaRef` ([#751])
+4. Make `visit_expression_internal` private, and `unwrap_kernel_expression` pub(crate) ([#767])
+5. Make actions types `pub(crate)` instead of `pub` ([#405])
+6. New `null_row` ExpressionHandler API ([#662])
+7. Rename enums `ReaderFeatures` -> `ReaderFeature` and `WriterFeatures` -> `WriterFeature` ([#802])
+8. Remove `get_` prefix from engine getters ([#804])
+9. Rename `FileSystemClient` to `StorageHandler` ([#805])
+10. Adopt types for table features (New `ReadFeature::Unknown(String)` and
+    (`WriterFeature::Unknown(String)`) ([#684])
+11. Renamed `ScanData` to `ScanMetadata` ([#817])
+    - rename `ScanData` to `ScanMetadata`
+    - rename `Scan::scan_data()` to `Scan::scan_metadata()`
+    - (ffi) rename `free_kernel_scan_data()` to `free_scan_metadata_iter()`
+    - (ffi) rename `kernel_scan_data_next()` to `scan_metadata_next()`
+    - (ffi) rename `visit_scan_data()` to `visit_scan_metadata()`
+    - (ffi) rename `kernel_scan_data_init()` to `scan_metadata_iter_init()`
+    - (ffi) rename `KernelScanDataIterator` to `ScanMetadataIterator`
+    - (ffi) rename `SharedScanDataIterator` to `SharedScanMetadataIterator`
+12. `ScanMetadata` is now a struct (instead of tuple) with new `FiltereEngineData` type  ([#768])
+
+### 🚀 Features / new APIs
+
+1. (`v2Checkpoint`) Extract & insert sidecar batches in `replay`'s action iterator ([#679])
+2. Support the `v2Checkpoint` reader/writer feature ([#685])
+3. Add check for whether `appendOnly` table feature is supported or enabled  ([#664])
+4. Add basic partition pruning support ([#713])
+5. Add `DeletionVectors` to supported writer features ([#735])
+6. Add writer version 2/invariant table feature support ([#734])
+7. Improved pre-signed URL checks ([#760])
+8. Add `CheckpointMetadata` action ([#781])
+9. Add classic and uuid parquet checkpoint path generation ([#782])
+10. New `Snapshot::try_new_from()` API ([#549])
+
+### 🐛 Bug Fixes
+
+1. Return `Error::unsupported` instead of panic in `Scalar::to_array(MapType)` ([#757])
+2. Remove 'default-members' in workspace, default to all crates ([#752])
+3. Update compilation error and clippy lints for rustc 1.86 ([#800])
+
+### 🚜 Refactor
+
+1. Split up `arrow_expression` module ([#750])
+2. Flatten deeply nested match statement ([#756])
+3. Simplify predicate evaluation by supporting inversion ([#761])
+4. Rename `LogSegment::replay` to `LogSegment::read_actions` ([#766])
+5. Extract deduplication logic from `AddRemoveDedupVisitor` into embeddable `FileActionsDeduplicator` ([#769])
+6. Move testing helper function to `test_utils` mod ([#794])
+7. Rename `_last_checkpoint` from `CheckpointMetadata` to `LastCheckpointHint` ([#789])
+8. Use ExpressionTransform instead of adhoc expression traversals ([#803])
+9. Extract log replay processing structure into `LogReplayProcessor` trait ([#774])
+
+### 🧪 Testing
+
+1. Add V2 checkpoint read support integration tests ([#690])
+
+### ⚙️ Chores/CI
+
+1. Use maintained action to setup rust toolchain ([#585])
+
+### Other
+
+1. Update HDFS dependencies ([#689])
+2. Add .cargo/config.toml with native instruction codegen ([#772])
+
+
+[#679]: https://github.com/delta-io/delta-kernel-rs/pull/679
+[#685]: https://github.com/delta-io/delta-kernel-rs/pull/685
+[#689]: https://github.com/delta-io/delta-kernel-rs/pull/689
+[#664]: https://github.com/delta-io/delta-kernel-rs/pull/664
+[#690]: https://github.com/delta-io/delta-kernel-rs/pull/690
+[#713]: https://github.com/delta-io/delta-kernel-rs/pull/713
+[#735]: https://github.com/delta-io/delta-kernel-rs/pull/735
+[#734]: https://github.com/delta-io/delta-kernel-rs/pull/734
+[#733]: https://github.com/delta-io/delta-kernel-rs/pull/733
+[#585]: https://github.com/delta-io/delta-kernel-rs/pull/585
+[#750]: https://github.com/delta-io/delta-kernel-rs/pull/750
+[#756]: https://github.com/delta-io/delta-kernel-rs/pull/756
+[#757]: https://github.com/delta-io/delta-kernel-rs/pull/757
+[#699]: https://github.com/delta-io/delta-kernel-rs/pull/699
+[#752]: https://github.com/delta-io/delta-kernel-rs/pull/752
+[#751]: https://github.com/delta-io/delta-kernel-rs/pull/751
+[#761]: https://github.com/delta-io/delta-kernel-rs/pull/761
+[#760]: https://github.com/delta-io/delta-kernel-rs/pull/760
+[#766]: https://github.com/delta-io/delta-kernel-rs/pull/766
+[#767]: https://github.com/delta-io/delta-kernel-rs/pull/767
+[#405]: https://github.com/delta-io/delta-kernel-rs/pull/405
+[#772]: https://github.com/delta-io/delta-kernel-rs/pull/772
+[#662]: https://github.com/delta-io/delta-kernel-rs/pull/662
+[#769]: https://github.com/delta-io/delta-kernel-rs/pull/769
+[#794]: https://github.com/delta-io/delta-kernel-rs/pull/794
+[#781]: https://github.com/delta-io/delta-kernel-rs/pull/781
+[#789]: https://github.com/delta-io/delta-kernel-rs/pull/789
+[#800]: https://github.com/delta-io/delta-kernel-rs/pull/800
+[#802]: https://github.com/delta-io/delta-kernel-rs/pull/802
+[#803]: https://github.com/delta-io/delta-kernel-rs/pull/803
+[#774]: https://github.com/delta-io/delta-kernel-rs/pull/774
+[#804]: https://github.com/delta-io/delta-kernel-rs/pull/804
+[#782]: https://github.com/delta-io/delta-kernel-rs/pull/782
+[#805]: https://github.com/delta-io/delta-kernel-rs/pull/805
+[#549]: https://github.com/delta-io/delta-kernel-rs/pull/549
+[#684]: https://github.com/delta-io/delta-kernel-rs/pull/684
+[#817]: https://github.com/delta-io/delta-kernel-rs/pull/817
+[#768]: https://github.com/delta-io/delta-kernel-rs/pull/768
+
+
+## [v0.8.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.8.0/) (2025-03-04)
+
+[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.7.0...v0.8.0)
+
+### 🏗️ Breaking changes
+
+1. ffi: `get_partition_column_count` and `get_partition_columns` now take a `Snapshot` instead of a
+   `Scan` ([#697])
+2. ffi: expression visitor callback `visit_literal_decimal` now takes `i64` for the upper half of a 128-bit int value  ([#724])
+3. - `DefaultJsonHandler::with_readahead()` renamed to `DefaultJsonHandler::with_buffer_size()` ([#711])
+4. DefaultJsonHandler's defaults changed:
+  - default buffer size: 10 => 1000 requests/files
+  - default batch size: 1024 => 1000 rows
+5. Bump MSRV to rustc 1.81 ([#725])
+
+### 🐛 Bug Fixes
+
+1. Pin `chrono` version to fix arrow compilation failure ([#719])
+
+### ⚡ Performance
+
+1. Replace default engine JSON reader's `FileStream` with concurrent futures ([#711])
+
+
+[#719]: https://github.com/delta-io/delta-kernel-rs/pull/719
+[#724]: https://github.com/delta-io/delta-kernel-rs/pull/724
+[#697]: https://github.com/delta-io/delta-kernel-rs/pull/697
+[#725]: https://github.com/delta-io/delta-kernel-rs/pull/725
+[#711]: https://github.com/delta-io/delta-kernel-rs/pull/711
+
+
+## [v0.7.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.7.0/) (2025-02-24)
+
+[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.1...v0.7.0)
+
+### 🏗️ Breaking changes
+1. Read transforms are now communicated via expressions ([#607], [#612], [#613], [#614]) This includes:
+    - `ScanData` now includes a third tuple field: a row-indexed vector of transforms to apply to the `EngineData`.
+    - Adds a new `scan::state::transform_to_logical` function that encapsulates the boilerplate of applying the transform expression
+    - Removes `scan_action_iter` API and `logical_to_physical` API
+    - Removes `column_mapping_mode` from `GlobalScanState`
+    - ffi: exposes methods to get an expression evaluator and evaluate an expression from c
+    - read-table example: Removes `add_partition_columns` in arrow.c
+    - read-table example: adds an `apply_transform` function in arrow.c
+2. ffi: support field nullability in schema visitor ([#656])
+3. ffi: expose metadata in SchemaEngineVisitor ffi api ([#659])
+4. ffi: new `visit_schema` FFI now operates on a `Schema` instead of a `Snapshot` ([#683], [#709])
+5. Introduced feature flags (`arrow_54` and `arrow_53`) to select major arrow versions ([#654], [#708], [#717])
+
+### 🚀 Features / new APIs
+
+1. Read `partition_values` in `RemoveVisitor` and remove `break` in `RowVisitor` for `RemoveVisitor` ([#633])
+2. Add the in-commit timestamp field to CommitInfo ([#581])
+3. Support NOT and column expressions in eval_sql_where ([#653])
+4. Add check for schema read compatibility ([#554])
+5. Introduce `TableConfiguration` to jointly manage metadata, protocol, and table properties ([#644])
+6. Add visitor `SidecarVisitor` and `Sidecar` action struct  ([#673])
+7. Add in-commit timestamps table properties ([#558])
+8. Support writing to writer version 1 ([#693])
+9. ffi: new `logical_schema` FFI to get the logical schema of a snapshot ([#709])
+
+### 🐛 Bug Fixes
+
+1. Incomplete multi-part checkpoint handling when no hint is provided ([#641])
+2. Consistent PartialEq for Scalar ([#677])
+3. Cargo fmt does not handle mods defined in macros ([#676])
+4. Ensure properly nested null masks for parquet reads ([#692])
+5. Handle predicates on non-nullable columns without stats ([#700])
+
+### 📚 Documentation
+
+1. Update readme to reflect tracing feature is needed for read-table ([#619])
+2. Clarify `JsonHandler` semantics on EngineData ordering ([#635])
+
+### 🚜 Refactor
+
+1. Make [non] nullable struct fields easier to create ([#646])
+2. Make eval_sql_where available to DefaultPredicateEvaluator ([#627])
+
+### 🧪 Testing
+
+1. Port cdf tests from delta-spark to kernel ([#611])
+
+### ⚙️ Chores/CI
+
+1. Fix some typos ([#643])
+2. Release script publishing fixes ([#638])
+
+[#638]: https://github.com/delta-io/delta-kernel-rs/pull/638
+[#643]: https://github.com/delta-io/delta-kernel-rs/pull/643
+[#619]: https://github.com/delta-io/delta-kernel-rs/pull/619
+[#635]: https://github.com/delta-io/delta-kernel-rs/pull/635
+[#633]: https://github.com/delta-io/delta-kernel-rs/pull/633
+[#611]: https://github.com/delta-io/delta-kernel-rs/pull/611
+[#581]: https://github.com/delta-io/delta-kernel-rs/pull/581
+[#646]: https://github.com/delta-io/delta-kernel-rs/pull/646
+[#627]: https://github.com/delta-io/delta-kernel-rs/pull/627
+[#641]: https://github.com/delta-io/delta-kernel-rs/pull/641
+[#653]: https://github.com/delta-io/delta-kernel-rs/pull/653
+[#607]: https://github.com/delta-io/delta-kernel-rs/pull/607
+[#656]: https://github.com/delta-io/delta-kernel-rs/pull/656
+[#554]: https://github.com/delta-io/delta-kernel-rs/pull/554
+[#644]: https://github.com/delta-io/delta-kernel-rs/pull/644
+[#659]: https://github.com/delta-io/delta-kernel-rs/pull/659
+[#612]: https://github.com/delta-io/delta-kernel-rs/pull/612
+[#677]: https://github.com/delta-io/delta-kernel-rs/pull/677
+[#676]: https://github.com/delta-io/delta-kernel-rs/pull/676
+[#673]: https://github.com/delta-io/delta-kernel-rs/pull/673
+[#613]: https://github.com/delta-io/delta-kernel-rs/pull/613
+[#558]: https://github.com/delta-io/delta-kernel-rs/pull/558
+[#692]: https://github.com/delta-io/delta-kernel-rs/pull/692
+[#700]: https://github.com/delta-io/delta-kernel-rs/pull/700
+[#683]: https://github.com/delta-io/delta-kernel-rs/pull/683
+[#654]: https://github.com/delta-io/delta-kernel-rs/pull/654
+[#693]: https://github.com/delta-io/delta-kernel-rs/pull/693
+[#614]: https://github.com/delta-io/delta-kernel-rs/pull/614
+[#709]: https://github.com/delta-io/delta-kernel-rs/pull/709
+[#708]: https://github.com/delta-io/delta-kernel-rs/pull/708
+[#717]: https://github.com/delta-io/delta-kernel-rs/pull/717
+
+
 ## [v0.6.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.6.1/) (2025-01-10)
 
 [Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.0...v0.6.1)
diff --git a/Cargo.lock b/Cargo.lock
index 57beb9412..9d30a071f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,20 +4,14 @@ version = 3
 
 [[package]]
 name = "acceptance"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
- "arrow-array",
- "arrow-cast",
- "arrow-ord",
- "arrow-schema",
- "arrow-select",
  "datatest-stable",
  "delta_kernel",
  "flate2",
  "futures",
  "itertools 0.13.0",
  "object_store",
- "parquet",
  "serde",
  "serde_json",
  "tar",
@@ -59,16 +53,16 @@ dependencies = [
 
 [[package]]
 name = "ahash"
-version = "0.8.11"
+version = "0.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom 0.2.15",
+ "getrandom 0.3.3",
  "once_cell",
  "version_check",
- "zerocopy 0.7.35",
+ "zerocopy",
 ]
 
 [[package]]
@@ -162,55 +156,108 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.95"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
 
 [[package]]
 name = "arrow"
-version = "54.2.0"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3a3ec4fe573f9d1f59d99c085197ef669b00b088ba1d7bb75224732d9357a74"
+dependencies = [
+ "arrow-arith 53.4.1",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-cast 53.4.1",
+ "arrow-csv 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-ipc 53.4.1",
+ "arrow-json 53.4.1",
+ "arrow-ord 53.4.1",
+ "arrow-row 53.4.1",
+ "arrow-schema 53.4.1",
+ "arrow-select 53.4.1",
+ "arrow-string 53.4.1",
+]
+
+[[package]]
+name = "arrow"
+version = "54.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1"
+dependencies = [
+ "arrow-arith 54.2.1",
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-cast 54.2.1",
+ "arrow-csv 54.2.1",
+ "arrow-data 54.3.1",
+ "arrow-ipc 54.2.1",
+ "arrow-json 54.2.1",
+ "arrow-ord 54.2.1",
+ "arrow-row 54.2.1",
+ "arrow-schema 54.3.1",
+ "arrow-select 54.2.1",
+ "arrow-string 54.2.1",
+]
+
+[[package]]
+name = "arrow-arith"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7"
+checksum = "6dcf19f07792d8c7f91086c67b574a79301e367029b17fcf63fb854332246a10"
 dependencies = [
- "arrow-arith",
- "arrow-array",
- "arrow-buffer",
- "arrow-cast",
- "arrow-csv",
- "arrow-data",
- "arrow-ipc",
- "arrow-json",
- "arrow-ord",
- "arrow-row",
- "arrow-schema",
- "arrow-select",
- "arrow-string",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "chrono",
+ "half",
+ "num",
 ]
 
 [[package]]
 name = "arrow-arith"
-version = "54.2.0"
+version = "54.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8"
+checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
  "chrono",
  "num",
 ]
 
 [[package]]
 name = "arrow-array"
-version = "54.2.0"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7845c32b41f7053e37a075b3c2f29c6f5ea1b3ca6e5df7a2d325ee6e1b4a63cf"
+dependencies = [
+ "ahash",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "chrono",
+ "chrono-tz",
+ "half",
+ "hashbrown",
+ "num",
+]
+
+[[package]]
+name = "arrow-array"
+version = "54.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
+checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5"
 dependencies = [
  "ahash",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
  "chrono",
  "chrono-tz",
  "half",
@@ -220,28 +267,60 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
+checksum = "5b5c681a99606f3316f2a99d9c8b6fa3aad0b1d34d8f6d7a1b471893940219d8"
 dependencies = [
  "bytes",
  "half",
  "num",
 ]
 
+[[package]]
+name = "arrow-buffer"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c"
+dependencies = [
+ "bytes",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-cast"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6365f8527d4f87b133eeb862f9b8093c009d41a210b8f101f91aa2392f61daac"
+dependencies = [
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "arrow-select 53.4.1",
+ "atoi",
+ "base64",
+ "chrono",
+ "comfy-table",
+ "half",
+ "lexical-core",
+ "num",
+ "ryu",
+]
+
 [[package]]
 name = "arrow-cast"
-version = "54.2.0"
+version = "54.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
+checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
- "arrow-select",
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
+ "arrow-select 54.2.1",
  "atoi",
- "base64 0.22.1",
+ "base64",
  "chrono",
  "comfy-table",
  "half",
@@ -252,13 +331,32 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c"
+checksum = "30dac4d23ac769300349197b845e0fd18c7f9f15d260d4659ae6b5a9ca06f586"
 dependencies = [
- "arrow-array",
- "arrow-cast",
- "arrow-schema",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-cast 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "chrono",
+ "csv",
+ "csv-core",
+ "lazy_static",
+ "lexical-core",
+ "regex",
+]
+
+[[package]]
+name = "arrow-csv"
+version = "54.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65"
+dependencies = [
+ "arrow-array 54.2.1",
+ "arrow-cast 54.2.1",
+ "arrow-schema 54.3.1",
  "chrono",
  "csv",
  "csv-core",
@@ -268,40 +366,86 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
+checksum = "cd962fc3bf7f60705b25bcaa8eb3318b2545aa1d528656525ebdd6a17a6cd6fb"
 dependencies = [
- "arrow-buffer",
- "arrow-schema",
+ "arrow-buffer 53.4.1",
+ "arrow-schema 53.4.1",
  "half",
  "num",
 ]
 
+[[package]]
+name = "arrow-data"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429"
+dependencies = [
+ "arrow-buffer 54.3.1",
+ "arrow-schema 54.3.1",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3527365b24372f9c948f16e53738eb098720eea2093ae73c7af04ac5e30a39b"
+dependencies = [
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-cast 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "flatbuffers",
+]
+
 [[package]]
 name = "arrow-ipc"
-version = "54.2.0"
+version = "54.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6"
+checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
  "flatbuffers",
 ]
 
 [[package]]
 name = "arrow-json"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305"
+checksum = "acdec0024749fc0d95e025c0b0266d78613727b3b3a5d4cf8ea47eb6d38afdd1"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-cast",
- "arrow-data",
- "arrow-schema",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-cast 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "chrono",
+ "half",
+ "indexmap",
+ "lexical-core",
+ "num",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "arrow-json"
+version = "54.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd"
+dependencies = [
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-cast 54.2.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
  "chrono",
  "half",
  "indexmap",
@@ -313,64 +457,133 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee"
+checksum = "79af2db0e62a508d34ddf4f76bfd6109b6ecc845257c9cba6f939653668f89ac"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
- "arrow-select",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "arrow-select 53.4.1",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-ord"
+version = "54.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160"
+dependencies = [
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
+ "arrow-select 54.2.1",
 ]
 
 [[package]]
 name = "arrow-row"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4"
+checksum = "da30e9d10e9c52f09ea0cf15086d6d785c11ae8dcc3ea5f16d402221b6ac7735"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "ahash",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
  "half",
 ]
 
+[[package]]
+name = "arrow-row"
+version = "54.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd"
+dependencies = [
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
+ "half",
+]
+
+[[package]]
+name = "arrow-schema"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35b0f9c0c3582dd55db0f136d3b44bfa0189df07adcf7dc7f2f2e74db0f52eb8"
+dependencies = [
+ "bitflags 2.9.1",
+]
+
 [[package]]
 name = "arrow-schema"
-version = "54.2.0"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9"
+dependencies = [
+ "bitflags 2.9.1",
+]
+
+[[package]]
+name = "arrow-select"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
+checksum = "92fc337f01635218493c23da81a364daf38c694b05fc20569c3193c11c561984"
 dependencies = [
- "bitflags 2.8.0",
+ "ahash",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "num",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "54.2.0"
+version = "54.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
+checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c"
 dependencies = [
  "ahash",
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
  "num",
 ]
 
 [[package]]
 name = "arrow-string"
-version = "54.2.0"
+version = "53.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0"
+checksum = "d596a9fc25dae556672d5069b090331aca8acb93cae426d8b7dcdf1c558fa0ce"
 dependencies = [
- "arrow-array",
- "arrow-buffer",
- "arrow-data",
- "arrow-schema",
- "arrow-select",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-schema 53.4.1",
+ "arrow-select 53.4.1",
+ "memchr",
+ "num",
+ "regex",
+ "regex-syntax 0.8.5",
+]
+
+[[package]]
+name = "arrow-string"
+version = "54.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458"
+dependencies = [
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-data 54.3.1",
+ "arrow-schema 54.3.1",
+ "arrow-select 54.2.1",
  "memchr",
  "num",
  "regex",
@@ -379,13 +592,13 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.86"
+version = "0.1.88"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d"
+checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -411,9 +624,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
 [[package]]
 name = "backtrace"
-version = "0.3.74"
+version = "0.3.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
 dependencies = [
  "addr2line",
  "cfg-if",
@@ -424,12 +637,6 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
-[[package]]
-name = "base64"
-version = "0.21.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
-
 [[package]]
 name = "base64"
 version = "0.22.1"
@@ -459,9 +666,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.8.0"
+version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
 
 [[package]]
 name = "block-buffer"
@@ -494,9 +701,9 @@ dependencies = [
 
 [[package]]
 name = "brotli-decompressor"
-version = "4.0.2"
+version = "4.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37"
+checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -510,9 +717,9 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
 
 [[package]]
 name = "bytemuck"
-version = "1.21.0"
+version = "1.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3"
+checksum = "9134a6ef01ce4b366b50689c94f82c14bc72bc5d0386829828a2e2752ef7958c"
 
 [[package]]
 name = "byteorder"
@@ -522,9 +729,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.10.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
 
 [[package]]
 name = "camino"
@@ -543,9 +750,9 @@ dependencies = [
 
 [[package]]
 name = "cbindgen"
-version = "0.27.0"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb"
+checksum = "eadd868a2ce9ca38de7eeafdcec9c7065ef89b42b32f0839278d55f35c54d1ff"
 dependencies = [
  "clap",
  "heck 0.4.1",
@@ -555,16 +762,16 @@ dependencies = [
  "quote",
  "serde",
  "serde_json",
- "syn 2.0.98",
+ "syn 2.0.101",
  "tempfile",
  "toml",
 ]
 
 [[package]]
 name = "cc"
-version = "1.2.0"
+version = "1.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8"
+checksum = "5f4ac86a9e5bc1e2b3449ab9d7d3a6a405e3d1bb28d7b9be8614f55846ae3766"
 dependencies = [
  "jobserver",
  "libc",
@@ -600,9 +807,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz"
-version = "0.10.1"
+version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f"
+checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3"
 dependencies = [
  "chrono",
  "chrono-tz-build",
@@ -611,9 +818,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz-build"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7"
+checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402"
 dependencies = [
  "parse-zoneinfo",
  "phf_codegen",
@@ -631,9 +838,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.22"
+version = "4.5.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69371e34337c4c984bbe322360c2547210bf632eb2814bbe78a6e87a2935bd2b"
+checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -641,9 +848,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.22"
+version = "4.5.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e24c1b4099818523236a8ca881d2b45db98dadfb4625cf6608c12069fcbbde1"
+checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120"
 dependencies = [
  "anstream",
  "anstyle",
@@ -653,14 +860,14 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.18"
+version = "4.5.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
+checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -700,7 +907,7 @@ version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "once_cell",
  "tiny-keccak",
 ]
@@ -742,9 +949,9 @@ dependencies = [
 
 [[package]]
 name = "crc"
-version = "3.2.1"
+version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
+checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
 dependencies = [
  "crc-catalog",
 ]
@@ -824,17 +1031,11 @@ dependencies = [
 
 [[package]]
 name = "delta_kernel"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
- "arrow",
- "arrow-arith",
- "arrow-array",
- "arrow-buffer",
- "arrow-cast",
- "arrow-json",
- "arrow-ord",
- "arrow-schema",
- "arrow-select",
+ "arrow 53.4.1",
+ "arrow 54.2.1",
+ "async-trait",
  "bytes",
  "chrono",
  "delta_kernel",
@@ -847,7 +1048,8 @@ dependencies = [
  "indexmap",
  "itertools 0.13.0",
  "object_store",
- "parquet",
+ "parquet 53.4.1",
+ "parquet 54.2.1",
  "paste",
  "reqwest",
  "roaring",
@@ -873,20 +1075,17 @@ dependencies = [
 
 [[package]]
 name = "delta_kernel_derive"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "delta_kernel_ffi"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
- "arrow-array",
- "arrow-data",
- "arrow-schema",
  "cbindgen",
  "delta_kernel",
  "delta_kernel_ffi_macros",
@@ -904,11 +1103,11 @@ dependencies = [
 
 [[package]]
 name = "delta_kernel_ffi_macros"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -939,7 +1138,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -956,9 +1155,9 @@ dependencies = [
 
 [[package]]
 name = "either"
-version = "1.13.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
 name = "encoding_rs"
@@ -979,16 +1178,22 @@ dependencies = [
  "regex",
 ]
 
+[[package]]
+name = "env_home"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe"
+
 [[package]]
 name = "env_logger"
-version = "0.11.5"
+version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
+checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
 dependencies = [
  "anstream",
  "anstyle",
  "env_filter",
- "humantime",
+ "jiff",
  "log",
 ]
 
@@ -1000,9 +1205,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
-version = "0.3.10"
+version = "0.3.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
+checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
 dependencies = [
  "libc",
  "windows-sys 0.59.0",
@@ -1033,7 +1238,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
 [[package]]
 name = "feature_tests"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
  "delta_kernel",
 ]
@@ -1082,9 +1287,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.35"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c"
+checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1176,7 +1381,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -1211,21 +1416,21 @@ dependencies = [
 
 [[package]]
 name = "g2gen"
-version = "1.1.0"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc3e32f911a41e073b8492473c3595a043e1369ab319a2dbf8c89b1fea06457c"
+checksum = "c5a7e0eb46f83a20260b850117d204366674e85d3a908d90865c78df9a6b1dfc"
 dependencies = [
  "g2poly",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "g2p"
-version = "1.1.0"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a9afa6efed9af3a5a68ba066429c1497c299d4eafbd948fe630df47a8f2d29f"
+checksum = "539e2644c030d3bf4cd208cb842d2ce2f80e82e6e8472390bcef83ceba0d80ad"
 dependencies = [
  "g2gen",
  "g2poly",
@@ -1233,9 +1438,9 @@ dependencies = [
 
 [[package]]
 name = "g2poly"
-version = "1.1.0"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fd8b261ccf00df8c5cc60c082bb7d7aa64c33a433cfcc091ca244326c924b2c"
+checksum = "312d2295c7302019c395cfb90dacd00a82a2eabd700429bba9c7a3f38dbbe11b"
 
 [[package]]
 name = "generic-array"
@@ -1249,9 +1454,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -1262,14 +1467,16 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.3.1"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
- "wasi 0.13.3+wasi-0.2.2",
- "windows-targets 0.52.6",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -1286,9 +1493,9 @@ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
 
 [[package]]
 name = "h2"
-version = "0.4.7"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e"
+checksum = "a9421a676d1b147b16b82c9225157dc629087ef8ec4d5e2960f9437a90dac0a5"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -1305,9 +1512,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.4.1"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -1316,19 +1523,19 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.2"
+version = "0.15.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
+checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
 
 [[package]]
 name = "hdfs-native"
-version = "0.10.4"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e72db0dfc43c1e6b7ef6d34f6d38eff079cbd30dbc18924b27108793e47893c"
+checksum = "fe9a986a98854573dfbc130f42f81e92f6d4581e23060708842fede6edef0f1f"
 dependencies = [
  "aes",
- "base64 0.21.7",
- "bitflags 2.8.0",
+ "base64",
+ "bitflags 2.9.1",
  "bytes",
  "cbc",
  "chrono",
@@ -1353,7 +1560,7 @@ dependencies = [
  "regex",
  "roxmltree",
  "socket2",
- "thiserror 1.0.69",
+ "thiserror 2.0.12",
  "tokio",
  "url",
  "uuid",
@@ -1363,9 +1570,9 @@ dependencies = [
 
 [[package]]
 name = "hdfs-native-object-store"
-version = "0.12.1"
+version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88d441ed4b31eceee8ffc10690ac2cbcab68d64c453238fa21ec4926f0dbb217"
+checksum = "2f2ed9304bed0023daf452b8017efd2f663d7e693f8b7f29a54e6c1c3d9aefd9"
 dependencies = [
  "async-trait",
  "bytes",
@@ -1373,7 +1580,7 @@ dependencies = [
  "futures",
  "hdfs-native",
  "object_store",
- "thiserror 1.0.69",
+ "thiserror 2.0.12",
  "tokio",
 ]
 
@@ -1415,9 +1622,9 @@ dependencies = [
 
 [[package]]
 name = "http"
-version = "1.2.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea"
+checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
 dependencies = [
  "bytes",
  "fnv",
@@ -1436,12 +1643,12 @@ dependencies = [
 
 [[package]]
 name = "http-body-util"
-version = "0.1.2"
+version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
 dependencies = [
  "bytes",
- "futures-util",
+ "futures-core",
  "http",
  "http-body",
  "pin-project-lite",
@@ -1449,15 +1656,15 @@ dependencies = [
 
 [[package]]
 name = "httparse"
-version = "1.10.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
 
 [[package]]
 name = "humantime"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
 
 [[package]]
 name = "hyper"
@@ -1515,9 +1722,9 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.10"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4"
+checksum = "cf9f1e950e0d9d1d3c47184416723cf29c0d1f93bd8cccf37e4beb6b44f31710"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1525,6 +1732,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
+ "libc",
  "pin-project-lite",
  "socket2",
  "tokio",
@@ -1534,14 +1742,15 @@ dependencies = [
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.61"
+version = "0.1.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
  "iana-time-zone-haiku",
  "js-sys",
+ "log",
  "wasm-bindgen",
  "windows-core",
 ]
@@ -1557,21 +1766,22 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
 dependencies = [
  "displaydoc",
+ "potential_utf",
  "yoke",
  "zerofrom",
  "zerovec",
 ]
 
 [[package]]
-name = "icu_locid"
-version = "1.5.0"
+name = "icu_locale_core"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -1580,31 +1790,11 @@ dependencies = [
  "zerovec",
 ]
 
-[[package]]
-name = "icu_locid_transform"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
-dependencies = [
- "displaydoc",
- "icu_locid",
- "icu_locid_transform_data",
- "icu_provider",
- "tinystr",
- "zerovec",
-]
-
-[[package]]
-name = "icu_locid_transform_data"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
-
 [[package]]
 name = "icu_normalizer"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
 dependencies = [
  "displaydoc",
  "icu_collections",
@@ -1612,67 +1802,54 @@ dependencies = [
  "icu_properties",
  "icu_provider",
  "smallvec",
- "utf16_iter",
- "utf8_iter",
- "write16",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_normalizer_data"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
 
 [[package]]
 name = "icu_properties"
-version = "1.5.1"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
 dependencies = [
  "displaydoc",
  "icu_collections",
- "icu_locid_transform",
+ "icu_locale_core",
  "icu_properties_data",
  "icu_provider",
- "tinystr",
+ "potential_utf",
+ "zerotrie",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_properties_data"
-version = "1.5.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
 
 [[package]]
 name = "icu_provider"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
 dependencies = [
  "displaydoc",
- "icu_locid",
- "icu_provider_macros",
+ "icu_locale_core",
  "stable_deref_trait",
  "tinystr",
  "writeable",
  "yoke",
  "zerofrom",
+ "zerotrie",
  "zerovec",
 ]
 
-[[package]]
-name = "icu_provider_macros"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.98",
-]
-
 [[package]]
 name = "idna"
 version = "1.0.3"
@@ -1686,9 +1863,9 @@ dependencies = [
 
 [[package]]
 name = "idna_adapter"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
 dependencies = [
  "icu_normalizer",
  "icu_properties",
@@ -1696,9 +1873,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.7.1"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
+checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
  "equivalent",
  "hashbrown",
@@ -1706,9 +1883,9 @@ dependencies = [
 
 [[package]]
 name = "inout"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
 dependencies = [
  "block-padding",
  "generic-array",
@@ -1718,8 +1895,7 @@ dependencies = [
 name = "inspect-table"
 version = "0.1.0"
 dependencies = [
- "arrow-array",
- "arrow-schema",
+ "arrow 53.4.1",
  "clap",
  "delta_kernel",
  "env_logger",
@@ -1746,34 +1922,59 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
 [[package]]
 name = "itertools"
-version = "0.12.1"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
 dependencies = [
  "either",
 ]
 
 [[package]]
 name = "itertools"
-version = "0.13.0"
+version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
 dependencies = [
  "either",
 ]
 
 [[package]]
 name = "itoa"
-version = "1.0.14"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "jiff"
+version = "0.2.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806"
+dependencies = [
+ "jiff-static",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
+checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
 
 [[package]]
 name = "jobserver"
-version = "0.1.32"
+version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
+checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
 dependencies = [
+ "getrandom 0.3.3",
  "libc",
 ]
 
@@ -1859,25 +2060,25 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.169"
+version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
 [[package]]
 name = "libloading"
-version = "0.8.6"
+version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
+checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c"
 dependencies = [
  "cfg-if",
- "windows-targets 0.48.5",
+ "windows-targets 0.53.0",
 ]
 
 [[package]]
 name = "libm"
-version = "0.2.11"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
 
 [[package]]
 name = "libredox"
@@ -1885,7 +2086,7 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "libc",
  "redox_syscall",
 ]
@@ -1904,15 +2105,15 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.15"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
 [[package]]
 name = "litemap"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
 
 [[package]]
 name = "lock_api"
@@ -1926,9 +2127,15 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.22"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
 
 [[package]]
 name = "lz4_flex"
@@ -1972,9 +2179,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
 [[package]]
 name = "miniz_oxide"
-version = "0.8.5"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
+checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a"
 dependencies = [
  "adler2",
 ]
@@ -1992,9 +2199,9 @@ dependencies = [
 
 [[package]]
 name = "native-tls"
-version = "0.2.13"
+version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c"
+checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
 dependencies = [
  "libc",
  "log",
@@ -2107,7 +2314,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf"
 dependencies = [
  "async-trait",
- "base64 0.22.1",
+ "base64",
  "bytes",
  "chrono",
  "futures",
@@ -2134,17 +2341,17 @@ dependencies = [
 
 [[package]]
 name = "once_cell"
-version = "1.20.2"
+version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "openssl"
-version = "0.10.71"
+version = "0.10.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd"
+checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "cfg-if",
  "foreign-types",
  "libc",
@@ -2161,7 +2368,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2172,9 +2379,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.106"
+version = "0.9.108"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd"
+checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847"
 dependencies = [
  "cc",
  "libc",
@@ -2222,19 +2429,55 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "54.2.0"
+version = "53.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f8cf58b29782a7add991f655ff42929e31a7859f5319e53db9e39a714cb113c"
+dependencies = [
+ "ahash",
+ "arrow-array 53.4.1",
+ "arrow-buffer 53.4.1",
+ "arrow-cast 53.4.1",
+ "arrow-data 53.4.1",
+ "arrow-ipc 53.4.1",
+ "arrow-schema 53.4.1",
+ "arrow-select 53.4.1",
+ "base64",
+ "brotli",
+ "bytes",
+ "chrono",
+ "flate2",
+ "futures",
+ "half",
+ "hashbrown",
+ "lz4_flex",
+ "num",
+ "num-bigint",
+ "object_store",
+ "paste",
+ "seq-macro",
+ "snap",
+ "thrift",
+ "tokio",
+ "twox-hash",
+ "zstd",
+ "zstd-sys",
+]
+
+[[package]]
+name = "parquet"
+version = "54.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb"
+checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd"
 dependencies = [
  "ahash",
- "arrow-array",
- "arrow-buffer",
- "arrow-cast",
- "arrow-data",
- "arrow-ipc",
- "arrow-schema",
- "arrow-select",
- "base64 0.22.1",
+ "arrow-array 54.2.1",
+ "arrow-buffer 54.3.1",
+ "arrow-cast 54.2.1",
+ "arrow-data 54.3.1",
+ "arrow-ipc 54.2.1",
+ "arrow-schema 54.3.1",
+ "arrow-select 54.2.1",
+ "base64",
  "brotli",
  "bytes",
  "chrono",
@@ -2330,33 +2573,57 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.31"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "portable-atomic"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
+
+[[package]]
+name = "portable-atomic-util"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
+dependencies = [
+ "portable-atomic",
+]
+
+[[package]]
+name = "potential_utf"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
+dependencies = [
+ "zerovec",
+]
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 dependencies = [
- "zerocopy 0.7.35",
+ "zerocopy",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.93"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "prost"
-version = "0.12.6"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29"
+checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2364,31 +2631,31 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.12.6"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
+checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
  "anyhow",
- "itertools 0.12.1",
+ "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "prost-types"
-version = "0.12.6"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0"
+checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
 dependencies = [
  "prost",
 ]
 
 [[package]]
 name = "quick-xml"
-version = "0.37.2"
+version = "0.37.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003"
+checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb"
 dependencies = [
  "memchr",
  "serde",
@@ -2396,37 +2663,40 @@ dependencies = [
 
 [[package]]
 name = "quinn"
-version = "0.11.6"
+version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef"
+checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8"
 dependencies = [
  "bytes",
+ "cfg_aliases",
  "pin-project-lite",
  "quinn-proto",
  "quinn-udp",
  "rustc-hash",
  "rustls",
  "socket2",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "tokio",
  "tracing",
+ "web-time",
 ]
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.9"
+version = "0.11.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d"
+checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e"
 dependencies = [
  "bytes",
- "getrandom 0.2.15",
- "rand 0.8.5",
+ "getrandom 0.3.3",
+ "lru-slab",
+ "rand 0.9.1",
  "ring",
  "rustc-hash",
  "rustls",
  "rustls-pki-types",
  "slab",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "tinyvec",
  "tracing",
  "web-time",
@@ -2434,9 +2704,9 @@ dependencies = [
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.10"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
+checksum = "ee4e529991f949c5e25755532370b8af5d114acae52326361d68d47af64aa842"
 dependencies = [
  "cfg_aliases",
  "libc",
@@ -2448,13 +2718,19 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.38"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
 dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
 [[package]]
 name = "rand"
 version = "0.8.5"
@@ -2468,13 +2744,12 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.0"
+version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
+checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
 dependencies = [
  "rand_chacha 0.9.0",
- "rand_core 0.9.1",
- "zerocopy 0.8.18",
+ "rand_core 0.9.3",
 ]
 
 [[package]]
@@ -2494,7 +2769,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 dependencies = [
  "ppv-lite86",
- "rand_core 0.9.1",
+ "rand_core 0.9.3",
 ]
 
 [[package]]
@@ -2503,26 +2778,22 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
 ]
 
 [[package]]
 name = "rand_core"
-version = "0.9.1"
+version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a88e0da7a2c97baa202165137c158d0a2e824ac465d13d81046727b34cb247d3"
+checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom 0.3.1",
- "zerocopy 0.8.18",
+ "getrandom 0.3.3",
 ]
 
 [[package]]
 name = "read-table-changes"
 version = "0.1.0"
 dependencies = [
- "arrow",
- "arrow-array",
- "arrow-schema",
  "clap",
  "delta_kernel",
  "env_logger",
@@ -2534,7 +2805,7 @@ dependencies = [
 name = "read-table-multi-threaded"
 version = "0.1.0"
 dependencies = [
- "arrow",
+ "arrow 53.4.1",
  "clap",
  "delta_kernel",
  "env_logger",
@@ -2547,7 +2818,7 @@ dependencies = [
 name = "read-table-single-threaded"
 version = "0.1.0"
 dependencies = [
- "arrow",
+ "arrow 53.4.1",
  "clap",
  "delta_kernel",
  "env_logger",
@@ -2557,11 +2828,11 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.8"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
+checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
 ]
 
 [[package]]
@@ -2610,11 +2881,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
 
 [[package]]
 name = "reqwest"
-version = "0.12.12"
+version = "0.12.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da"
+checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb"
 dependencies = [
- "base64 0.22.1",
+ "base64",
  "bytes",
  "encoding_rs",
  "futures-core",
@@ -2661,13 +2932,13 @@ dependencies = [
 
 [[package]]
 name = "ring"
-version = "0.17.9"
+version = "0.17.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
 dependencies = [
  "cc",
  "cfg-if",
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "libc",
  "untrusted",
  "windows-sys 0.52.0",
@@ -2675,9 +2946,9 @@ dependencies = [
 
 [[package]]
 name = "roaring"
-version = "0.10.10"
+version = "0.10.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a652edd001c53df0b3f96a36a8dc93fce6866988efc16808235653c6bcac8bf2"
+checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -2685,12 +2956,9 @@ dependencies = [
 
 [[package]]
 name = "roxmltree"
-version = "0.18.1"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "862340e351ce1b271a378ec53f304a5558f7db87f3769dc655a8f6ecbb68b302"
-dependencies = [
- "xmlparser",
-]
+checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97"
 
 [[package]]
 name = "rustc-demangle"
@@ -2715,11 +2983,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.44"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -2728,9 +2996,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.23"
+version = "0.23.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395"
+checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321"
 dependencies = [
  "log",
  "once_cell",
@@ -2764,18 +3032,19 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.11.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
+checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
 dependencies = [
  "web-time",
+ "zeroize",
 ]
 
 [[package]]
 name = "rustls-webpki"
-version = "0.102.8"
+version = "0.103.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9"
+checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
 dependencies = [
  "ring",
  "rustls-pki-types",
@@ -2784,15 +3053,15 @@ dependencies = [
 
 [[package]]
 name = "rustversion"
-version = "1.0.18"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248"
+checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
 
 [[package]]
 name = "ryu"
-version = "1.0.18"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 
 [[package]]
 name = "same-file"
@@ -2824,7 +3093,7 @@ version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "core-foundation 0.9.4",
  "core-foundation-sys",
  "libc",
@@ -2837,7 +3106,7 @@ version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "core-foundation 0.10.0",
  "core-foundation-sys",
  "libc",
@@ -2856,15 +3125,15 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.23"
+version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
 
 [[package]]
 name = "seq-macro"
-version = "0.3.5"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
 
 [[package]]
 name = "serde"
@@ -2883,7 +3152,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2957,9 +3226,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.14.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
+checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9"
 
 [[package]]
 name = "snafu"
@@ -2976,10 +3245,10 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
 dependencies = [
- "heck 0.4.1",
+ "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -2990,9 +3259,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "socket2"
-version = "0.5.8"
+version = "0.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
+checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef"
 dependencies = [
  "libc",
  "windows-sys 0.52.0",
@@ -3041,7 +3310,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3063,9 +3332,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.98"
+version = "2.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
+checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3083,13 +3352,13 @@ dependencies = [
 
 [[package]]
 name = "synstructure"
-version = "0.13.1"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3098,7 +3367,7 @@ version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "core-foundation 0.9.4",
  "system-configuration-sys",
 ]
@@ -3115,9 +3384,9 @@ dependencies = [
 
 [[package]]
 name = "tar"
-version = "0.4.43"
+version = "0.4.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6"
+checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a"
 dependencies = [
  "filetime",
  "libc",
@@ -3126,19 +3395,18 @@ dependencies = [
 
 [[package]]
 name = "target-triple"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42a4d50cdb458045afc8131fd91b64904da29548bcb63c7236e0844936c13078"
+checksum = "1ac9aa371f599d22256307c24a9d748c041e548cbf599f35d890f9d365361790"
 
 [[package]]
 name = "tempfile"
-version = "3.17.1"
+version = "3.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
 dependencies = [
- "cfg-if",
  "fastrand",
- "getrandom 0.3.1",
+ "getrandom 0.3.3",
  "once_cell",
  "rustix",
  "windows-sys 0.59.0",
@@ -3171,7 +3439,7 @@ dependencies = [
  "cfg-if",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3182,7 +3450,7 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
  "test-case-core",
 ]
 
@@ -3204,19 +3472,16 @@ checksum = "888d0c3c6db53c0fdab160d2ed5e12ba745383d3e85813f2ea0f2b1475ab553f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "test_utils"
-version = "0.6.1"
+version = "0.9.0"
 dependencies = [
- "arrow-array",
- "arrow-schema",
  "delta_kernel",
  "itertools 0.13.0",
  "object_store",
- "parquet",
 ]
 
 [[package]]
@@ -3230,11 +3495,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.11"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
+checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
 dependencies = [
- "thiserror-impl 2.0.11",
+ "thiserror-impl 2.0.12",
 ]
 
 [[package]]
@@ -3245,18 +3510,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.11"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
+checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3291,9 +3556,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.7.6"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -3301,9 +3566,9 @@ dependencies = [
 
 [[package]]
 name = "tinyvec"
-version = "1.8.1"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8"
+checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71"
 dependencies = [
  "tinyvec_macros",
 ]
@@ -3316,9 +3581,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.43.0"
+version = "1.45.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
+checksum = "2513ca694ef9ede0fb23fe71a4ee4107cb102b9dc1930f6d0fd77aae068ae165"
 dependencies = [
  "backtrace",
  "bytes",
@@ -3338,7 +3603,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3353,9 +3618,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-rustls"
-version = "0.26.1"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37"
+checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
 dependencies = [
  "rustls",
  "tokio",
@@ -3363,9 +3628,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.13"
+version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078"
+checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
 dependencies = [
  "bytes",
  "futures-core",
@@ -3376,9 +3641,9 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.8.20"
+version = "0.8.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
+checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae"
 dependencies = [
  "serde",
  "serde_spanned",
@@ -3388,26 +3653,33 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.8"
+version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3"
 dependencies = [
  "serde",
 ]
 
 [[package]]
 name = "toml_edit"
-version = "0.22.24"
+version = "0.22.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
+checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e"
 dependencies = [
  "indexmap",
  "serde",
  "serde_spanned",
  "toml_datetime",
+ "toml_write",
  "winnow",
 ]
 
+[[package]]
+name = "toml_write"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076"
+
 [[package]]
 name = "tower"
 version = "0.5.2"
@@ -3455,7 +3727,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3518,9 +3790,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 
 [[package]]
 name = "trybuild"
-version = "1.0.103"
+version = "1.0.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b812699e0c4f813b872b373a4471717d9eb550da14b311058a4d9cf4173cbca6"
+checksum = "1c9bf9513a2f4aeef5fdac8677d7d349c79fdbcc03b9c86da6e9d254f1e43be2"
 dependencies = [
  "glob",
  "serde",
@@ -3549,9 +3821,9 @@ checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.14"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
 [[package]]
 name = "unicode-segmentation"
@@ -3577,14 +3849,14 @@ version = "2.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
 dependencies = [
- "base64 0.22.1",
+ "base64",
  "flate2",
  "log",
  "once_cell",
  "rustls",
  "rustls-pki-types",
  "url",
- "webpki-roots",
+ "webpki-roots 0.26.11",
 ]
 
 [[package]]
@@ -3598,12 +3870,6 @@ dependencies = [
  "percent-encoding",
 ]
 
-[[package]]
-name = "utf16_iter"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
-
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -3618,12 +3884,12 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.13.2"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6"
+checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
 dependencies = [
- "getrandom 0.3.1",
- "rand 0.9.0",
+ "getrandom 0.3.3",
+ "rand 0.9.1",
 ]
 
 [[package]]
@@ -3652,7 +3918,7 @@ checksum = "d674d135b4a8c1d7e813e2f8d1c9a58308aee4a680323066025e53132218bd91"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
@@ -3682,9 +3948,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasi"
-version = "0.13.3+wasi-0.2.2"
+version = "0.14.2+wasi-0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
 dependencies = [
  "wit-bindgen-rt",
 ]
@@ -3717,7 +3983,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
  "wasm-bindgen-shared",
 ]
 
@@ -3752,7 +4018,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3801,30 +4067,39 @@ dependencies = [
 
 [[package]]
 name = "webpki-roots"
-version = "0.26.8"
+version = "0.26.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9"
+checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
+dependencies = [
+ "webpki-roots 1.0.0",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2853738d1cc4f2da3a225c18ec6c3721abb31961096e9dbf5ab35fa88b19cfdb"
 dependencies = [
  "rustls-pki-types",
 ]
 
 [[package]]
 name = "which"
-version = "4.4.2"
+version = "7.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+checksum = "24d643ce3fd3e5b54854602a080f34fb10ab75e0b813ee32d00ca2b44fa74762"
 dependencies = [
  "either",
- "home",
- "once_cell",
+ "env_home",
  "rustix",
+ "winsafe",
 ]
 
 [[package]]
 name = "whoami"
-version = "1.5.2"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d"
+checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7"
 dependencies = [
  "redox_syscall",
  "wasite",
@@ -3853,7 +4128,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.48.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -3864,41 +4139,81 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "windows-core"
-version = "0.52.0"
+version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
- "windows-targets 0.52.6",
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings 0.4.2",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
 ]
 
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
+
 [[package]]
 name = "windows-registry"
-version = "0.2.0"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0"
+checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3"
 dependencies = [
  "windows-result",
- "windows-strings",
- "windows-targets 0.52.6",
+ "windows-strings 0.3.1",
+ "windows-targets 0.53.0",
 ]
 
 [[package]]
 name = "windows-result"
-version = "0.2.0"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
 dependencies = [
- "windows-targets 0.52.6",
+ "windows-link",
 ]
 
 [[package]]
 name = "windows-strings"
-version = "0.1.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
 dependencies = [
- "windows-result",
- "windows-targets 0.52.6",
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
 ]
 
 [[package]]
@@ -3952,13 +4267,29 @@ dependencies = [
  "windows_aarch64_gnullvm 0.52.6",
  "windows_aarch64_msvc 0.52.6",
  "windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm",
+ "windows_i686_gnullvm 0.52.6",
  "windows_i686_msvc 0.52.6",
  "windows_x86_64_gnu 0.52.6",
  "windows_x86_64_gnullvm 0.52.6",
  "windows_x86_64_msvc 0.52.6",
 ]
 
+[[package]]
+name = "windows-targets"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b"
+dependencies = [
+ "windows_aarch64_gnullvm 0.53.0",
+ "windows_aarch64_msvc 0.53.0",
+ "windows_i686_gnu 0.53.0",
+ "windows_i686_gnullvm 0.53.0",
+ "windows_i686_msvc 0.53.0",
+ "windows_x86_64_gnu 0.53.0",
+ "windows_x86_64_gnullvm 0.53.0",
+ "windows_x86_64_msvc 0.53.0",
+]
+
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.48.5"
@@ -3971,6 +4302,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.5"
@@ -3983,6 +4320,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.5"
@@ -3995,12 +4338,24 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.5"
@@ -4013,6 +4368,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.5"
@@ -4025,6 +4386,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.48.5"
@@ -4037,6 +4404,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.5"
@@ -4049,58 +4422,57 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+
 [[package]]
 name = "winnow"
-version = "0.7.2"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603"
+checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
-name = "wit-bindgen-rt"
-version = "0.33.0"
+name = "winsafe"
+version = "0.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
-dependencies = [
- "bitflags 2.8.0",
-]
+checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904"
 
 [[package]]
-name = "write16"
-version = "1.0.0"
+name = "wit-bindgen-rt"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags 2.9.1",
+]
 
 [[package]]
 name = "writeable"
-version = "0.5.5"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
 
 [[package]]
 name = "xattr"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e105d177a3871454f754b33bb0ee637ecaaac997446375fd3e5d43a2ed00c909"
+checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e"
 dependencies = [
  "libc",
- "linux-raw-sys",
  "rustix",
 ]
 
-[[package]]
-name = "xmlparser"
-version = "0.13.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
-
 [[package]]
 name = "yoke"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
+checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
 dependencies = [
  "serde",
  "stable_deref_trait",
@@ -4110,13 +4482,13 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
+checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
  "synstructure",
 ]
 
@@ -4128,63 +4500,42 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64"
 
 [[package]]
 name = "zerocopy"
-version = "0.7.35"
+version = "0.8.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
 dependencies = [
- "byteorder",
- "zerocopy-derive 0.7.35",
-]
-
-[[package]]
-name = "zerocopy"
-version = "0.8.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79386d31a42a4996e3336b0919ddb90f81112af416270cff95b5f5af22b839c2"
-dependencies = [
- "zerocopy-derive 0.8.18",
+ "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.35"
+version = "0.8.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.8.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76331675d372f91bf8d17e13afbd5fe639200b73d01f0fc748bb059f9cca2db7"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "zerofrom"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
  "synstructure",
 ]
 
@@ -4194,11 +4545,22 @@ version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
 
+[[package]]
+name = "zerotrie"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
 [[package]]
 name = "zerovec"
-version = "0.10.4"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -4207,20 +4569,20 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.10.3"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.101",
 ]
 
 [[package]]
 name = "zstd"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
 dependencies = [
  "zstd-safe",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index ec7993736..bcd69af76 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,8 +8,11 @@ members = [
     "test-utils",
     "feature-tests",
 ]
-# Only check / build main crates by default (check all with `--workspace`)
-default-members = ["acceptance", "kernel"]
+# note that in addition to the members above, the workspace includes examples:
+# - inspect-table
+# - read-table-changes
+# - read-table-multi-threaded
+# - read-table-single-threaded
 resolver = "2"
 
 [workspace.package]
@@ -19,26 +22,11 @@ keywords = ["deltalake", "delta", "datalake"]
 license = "Apache-2.0"
 repository = "https://github.com/delta-io/delta-kernel-rs"
 readme = "README.md"
-rust-version = "1.80"
-version = "0.6.1"
+rust-version = "1.81"
+version = "0.9.0"
 
 [workspace.dependencies]
-# When changing the arrow version range, also modify ffi/Cargo.toml which has
-# its own arrow version ranges witeh modified features. Failure to do so will
-# result in compilation errors as two different sets of arrow dependencies may
-# be sourced
-arrow = { version = ">=53, <55" }
-arrow-arith = { version = ">=53, <55" }
-arrow-array = { version = ">=53, <55" }
-arrow-buffer = { version = ">=53, <55" }
-arrow-cast = { version = ">=53, <55" }
-arrow-data = { version = ">=53, <55" }
-arrow-ord = { version = ">=53, <55" }
-arrow-json = { version = ">=53, <55" }
-arrow-select = { version = ">=53, <55" }
-arrow-schema = { version = ">=53, <55" }
-parquet = { version = ">=53, <55", features = ["object_store"] }
 object_store = { version = ">=0.11, <0.12" }
-hdfs-native-object-store = "0.12.0"
-hdfs-native = "0.10.0"
+hdfs-native-object-store = "0.13.0"
+hdfs-native = "0.11.0"
 walkdir = "2.5.0"
diff --git a/README.md b/README.md
index 6e25a2ddb..d02a141e2 100644
--- a/README.md
+++ b/README.md
@@ -43,10 +43,10 @@ consumer's own `Engine` trait, the kernel has a feature flag to enable a default
 ```toml
 # fewer dependencies, requires consumer to implement Engine trait.
 # allows consumers to implement their own in-memory format
-delta_kernel = "0.6.1"
+delta_kernel = "0.9.0"
 
 # or turn on the default engine, based on arrow
-delta_kernel = { version = "0.6.1", features = ["default-engine"] }
+delta_kernel = { version = "0.9.0", features = ["default-engine"] }
 ```
 
 ### Feature flags
@@ -74,32 +74,19 @@ quickly. To enable engines that already integrate arrow to also integrate kernel
 to track a specific version of arrow that kernel depends on, we take as broad dependency on arrow
 versions as we can.
 
-This means you can force kernel to rely on the specific arrow version that your engine already uses,
-as long as it falls in that range. You can see the range in the `Cargo.toml` in the same folder as
-this `README.md`.
+We allow selecting the version of arrow to use via feature flags. Currently we support the following
+flags:
 
-For example, although arrow 53.1.0 has been released, you can force kernel to compile on 53.0 by
-putting the following in your project's `Cargo.toml`:
+- `arrow_53`: Use arrow version 53
+- `arrow_54`: Use arrow version 54
 
-```toml
-[patch.crates-io]
-arrow = "53.0"
-arrow-arith = "53.0"
-arrow-array = "53.0"
-arrow-buffer = "53.0"
-arrow-cast = "53.0"
-arrow-data = "53.0"
-arrow-ord = "53.0"
-arrow-json = "53.0"
-arrow-select = "53.0"
-arrow-schema = "53.0"
-parquet = "53.0"
-```
+Note that if more than one `arrow_x` feature is enabled, kernel will default to the _lowest_
+specified flag. This also means that if you use `--all-features` you will get the lowest version of
+arrow that kernel supports.
 
-Note that unfortunately patching in `cargo` requires that _exactly one_ version matches your
-specification. If only arrow "53.0.0" had been released the above will work, but if "53.0.1" where
-to be released, the specification will break and you will need to provide a more restrictive
-specification like `"=53.0.0"`.
+If no arrow feature is enabled, but are least one of `default-engine`, `sync-engine`,
+`arrow-conversion` or, `arrow-expression` is enabled, the lowest supported arrow version will be
+enabled.
 
 ### Object Store
 You may also need to patch the `object_store` version used if the version of `parquet` you depend on
diff --git a/acceptance/Cargo.toml b/acceptance/Cargo.toml
index 2854c7c39..e844007ef 100644
--- a/acceptance/Cargo.toml
+++ b/acceptance/Cargo.toml
@@ -14,19 +14,14 @@ rust-version.workspace = true
 release = false
 
 [dependencies]
-arrow-array = { workspace = true }
-arrow-cast = { workspace = true }
-arrow-ord = { workspace = true }
-arrow-select = { workspace = true }
-arrow-schema = { workspace = true }
 delta_kernel = { path = "../kernel", features = [
   "default-engine",
+  "arrow_53",
   "developer-visibility",
 ] }
 futures = "0.3"
 itertools = "0.13"
 object_store = { workspace = true }
-parquet = { workspace = true }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 thiserror = "1"
diff --git a/acceptance/src/data.rs b/acceptance/src/data.rs
index c515d50c9..9685f29c3 100644
--- a/acceptance/src/data.rs
+++ b/acceptance/src/data.rs
@@ -1,15 +1,18 @@
 use std::{path::Path, sync::Arc};
 
-use arrow_array::{Array, RecordBatch};
-use arrow_ord::sort::{lexsort_to_indices, SortColumn};
-use arrow_schema::{DataType, Schema};
-use arrow_select::{concat::concat_batches, filter::filter_record_batch, take::take};
+use delta_kernel::arrow::array::{Array, RecordBatch};
+use delta_kernel::arrow::compute::{
+    concat_batches, filter_record_batch, lexsort_to_indices, take, SortColumn,
+};
+use delta_kernel::arrow::datatypes::{DataType, Schema};
 
+use delta_kernel::parquet::arrow::async_reader::{
+    ParquetObjectReader, ParquetRecordBatchStreamBuilder,
+};
 use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Engine, Error, Table};
 use futures::{stream::TryStreamExt, StreamExt};
 use itertools::Itertools;
 use object_store::{local::LocalFileSystem, ObjectStore};
-use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
 
 use crate::{TestCaseInfo, TestResult};
 
@@ -83,8 +86,8 @@ fn assert_schema_fields_match(schema: &Schema, golden: &Schema) {
 fn normalize_col(col: Arc<dyn Array>) -> Arc<dyn Array> {
     if let DataType::Timestamp(unit, Some(zone)) = col.data_type() {
         if **zone == *"+00:00" {
-            arrow_cast::cast::cast(&col, &DataType::Timestamp(*unit, Some("UTC".into())))
-                .expect("Could not cast to UTC")
+            let data_type = DataType::Timestamp(*unit, Some("UTC".into()));
+            delta_kernel::arrow::compute::cast(&col, &data_type).expect("Could not cast to UTC")
         } else {
             col
         }
@@ -106,7 +109,10 @@ fn assert_columns_match(actual: &[Arc<dyn Array>], expected: &[Arc<dyn Array>])
     }
 }
 
-pub async fn assert_scan_data(engine: Arc<dyn Engine>, test_case: &TestCaseInfo) -> TestResult<()> {
+pub async fn assert_scan_metadata(
+    engine: Arc<dyn Engine>,
+    test_case: &TestCaseInfo,
+) -> TestResult<()> {
     let table_root = test_case.table_root()?;
     let table = Table::new(table_root);
     let snapshot = table.snapshot(engine.as_ref(), None)?;
diff --git a/acceptance/src/meta.rs b/acceptance/src/meta.rs
index fb8370f7e..e1c87088e 100644
--- a/acceptance/src/meta.rs
+++ b/acceptance/src/meta.rs
@@ -89,7 +89,7 @@ impl TestCaseInfo {
         let tvm = TableVersionMetaData {
             version: snapshot.version(),
             properties: metadata
-                .configuration
+                .configuration()
                 .iter()
                 .map(|(k, v)| (k.clone(), v.clone()))
                 .collect(),
diff --git a/acceptance/tests/dat_reader.rs b/acceptance/tests/dat_reader.rs
index 622f038a9..6ba0e6d35 100644
--- a/acceptance/tests/dat_reader.rs
+++ b/acceptance/tests/dat_reader.rs
@@ -37,7 +37,7 @@ fn reader_test(path: &Path) -> datatest_stable::Result<()> {
             );
 
             case.assert_metadata(engine.clone()).await.unwrap();
-            acceptance::data::assert_scan_data(engine.clone(), &case)
+            acceptance::data::assert_scan_metadata(engine.clone(), &case)
                 .await
                 .unwrap();
         });
diff --git a/acceptance/tests/other.rs b/acceptance/tests/other.rs
index 5a89f23de..826cf580d 100644
--- a/acceptance/tests/other.rs
+++ b/acceptance/tests/other.rs
@@ -3,7 +3,7 @@
 /// Since each new `.rs` file in this directory results in increased build and link time, it is
 /// important to only add new files if absolutely necessary for code readability or test
 /// performance.
-use delta_kernel::snapshot::CheckpointMetadata;
+use delta_kernel::snapshot::LastCheckpointHint;
 
 #[test]
 fn test_checkpoint_serde() {
@@ -11,7 +11,7 @@ fn test_checkpoint_serde() {
         "./tests/dat/out/reader_tests/generated/with_checkpoint/delta/_delta_log/_last_checkpoint",
     )
     .unwrap();
-    let cp: CheckpointMetadata = serde_json::from_reader(file).unwrap();
+    let cp: LastCheckpointHint = serde_json::from_reader(file).unwrap();
     assert_eq!(cp.version, 2)
 }
 
@@ -26,8 +26,8 @@ async fn test_read_last_checkpoint() {
 
     let store = Arc::new(LocalFileSystem::new());
     let prefix = Path::from(url.path());
-    let client = ObjectStoreFileSystemClient::new(store, prefix);
-    let cp = read_last_checkpoint(&client, &url).await.unwrap().unwrap();
+    let storage = ObjectStoreStorageHandler::new(store, prefix);
+    let cp = read_last_checkpoint(&storage, &url).await.unwrap().unwrap();
     assert_eq!(cp.version, 2);
 }
 
diff --git a/feature-tests/Cargo.toml b/feature-tests/Cargo.toml
index 7e45e41e2..43f3773a7 100644
--- a/feature-tests/Cargo.toml
+++ b/feature-tests/Cargo.toml
@@ -12,7 +12,7 @@ version.workspace = true
 release = false
 
 [dependencies]
-delta_kernel = { path = "../kernel" }
+delta_kernel = { path = "../kernel", features = ["arrow_53"] }
 
 [features]
 default-engine = [ "delta_kernel/default-engine" ]
diff --git a/feature-tests/src/lib.rs b/feature-tests/src/lib.rs
index a421d86f9..6a07429f1 100644
--- a/feature-tests/src/lib.rs
+++ b/feature-tests/src/lib.rs
@@ -1,7 +1,10 @@
 /// This is a compilation test to ensure that the default-engine feature flags are working
-/// correctly. Run (from workspace root) with:
+/// correctly.
+///
+/// Run (from workspace root) with:
 /// 1. `cargo b -p feature_tests --features default-engine-rustls`
 /// 2. `cargo b -p feature_tests --features default-engine`
+///
 /// These run in our build CI.
 pub fn test_default_engine_feature_flags() {
     #[cfg(any(feature = "default-engine", feature = "default-engine-rustls"))]
diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml
index aa4edc167..7caca111b 100644
--- a/ffi/Cargo.toml
+++ b/ffi/Cargo.toml
@@ -24,23 +24,13 @@ url = "2"
 delta_kernel = { path = "../kernel", default-features = false, features = [
   "developer-visibility",
 ] }
-delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.1" }
-
-# used if we use the default engine to be able to move arrow data into the c-ffi format
-arrow-schema = { version = ">=53, <55", default-features = false, features = [
-  "ffi",
-], optional = true }
-arrow-data = { version = ">=53, <55", default-features = false, features = [
-  "ffi",
-], optional = true }
-arrow-array = { version = ">=53, <55", default-features = false, optional = true }
+delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.9.0" }
 
 [build-dependencies]
-cbindgen = "0.27.0"
+cbindgen = "0.28"
 libc = "0.2.158"
 
 [dev-dependencies]
-delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] }
 object_store = { workspace = true }
 rand = "0.8.5"
 test_utils = { path = "../test-utils" }
@@ -50,13 +40,8 @@ trybuild = "1.0"
 [features]
 default = ["default-engine"]
 cloud = ["delta_kernel/cloud"]
-default-engine = [
-  "delta_kernel/default-engine",
-  "arrow-array",
-  "arrow-data",
-  "arrow-schema",
-]
+default-engine = ["delta_kernel/default-engine", "delta_kernel/arrow"]
 tracing = [ "tracing-core", "tracing-subscriber" ]
-sync-engine = ["delta_kernel/sync-engine"]
+sync-engine = ["delta_kernel/sync-engine", "delta_kernel/arrow"]
 developer-visibility = []
 test-ffi = []
diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml
index 8fb3144f4..ca3f57251 100644
--- a/ffi/cbindgen.toml
+++ b/ffi/cbindgen.toml
@@ -25,4 +25,4 @@ parse_deps = true
 # only crates found in this list will ever be parsed.
 #
 # default: there is no allow-list (NOTE: this is the opposite of [])
-include = ["delta_kernel", "arrow-data", "arrow-schema"]
+include = ["arrow", "arrow-data", "arrow-schema", "delta_kernel"]
diff --git a/ffi/examples/read-table/arrow.c b/ffi/examples/read-table/arrow.c
index c6214df6b..1dabacde0 100644
--- a/ffi/examples/read-table/arrow.c
+++ b/ffi/examples/read-table/arrow.c
@@ -11,6 +11,7 @@ ArrowContext* init_arrow_context()
   context->num_batches = 0;
   context->batches = NULL;
   context->cur_filter = NULL;
+  context->cur_transform = NULL;
   return context;
 }
 
@@ -50,86 +51,10 @@ static GArrowRecordBatch* get_record_batch(FFI_ArrowArray* array, GArrowSchema*
   return record_batch;
 }
 
-// Add columns to a record batch for each partition. In a "real" engine we would want to parse the
-// string values into the correct data type. This program just adds all partition columns as strings
-// for simplicity
-static GArrowRecordBatch* add_partition_columns(
-  GArrowRecordBatch* record_batch,
-  PartitionList* partition_cols,
-  const CStringMap* partition_values)
-{
-  gint64 rows = garrow_record_batch_get_n_rows(record_batch);
-  gint64 cols = garrow_record_batch_get_n_columns(record_batch);
-  GArrowRecordBatch* cur_record_batch = record_batch;
-  GError* error = NULL;
-  for (uintptr_t i = 0; i < partition_cols->len; i++) {
-    char* col = partition_cols->cols[i];
-    guint pos = cols + i;
-    KernelStringSlice key = { col, strlen(col) };
-    char* partition_val = get_from_string_map(partition_values, key, allocate_string);
-    print_diag(
-      "  Adding partition column '%s' with value '%s' at column %u\n",
-      col,
-      partition_val ? partition_val : "NULL",
-      pos);
-    GArrowStringArrayBuilder* builder = garrow_string_array_builder_new();
-    for (gint64 i = 0; i < rows; i++) {
-      if (partition_val) {
-        garrow_string_array_builder_append_string(builder, partition_val, &error);
-      } else {
-        garrow_array_builder_append_null((GArrowArrayBuilder*)builder, &error);
-      }
-      if (report_g_error("Can't append to partition column builder", error)) {
-        break;
-      }
-    }
-
-    if (partition_val) {
-      free(partition_val);
-    }
-
-    if (error != NULL) {
-      printf("Giving up on column %s\n", col);
-      g_error_free(error);
-      g_object_unref(builder);
-      error = NULL;
-      continue;
-    }
-
-    GArrowArray* partition_col = garrow_array_builder_finish((GArrowArrayBuilder*)builder, &error);
-    if (report_g_error("Can't build string array for partition column", error)) {
-      printf("Giving up on column %s\n", col);
-      g_error_free(error);
-      g_object_unref(builder);
-      error = NULL;
-      continue;
-    }
-    g_object_unref(builder);
-
-    GArrowDataType* string_data_type = (GArrowDataType*)garrow_string_data_type_new();
-    GArrowField* field = garrow_field_new(col, string_data_type);
-    GArrowRecordBatch* old_batch = cur_record_batch;
-    cur_record_batch = garrow_record_batch_add_column(old_batch, pos, field, partition_col, &error);
-    g_object_unref(old_batch);
-    g_object_unref(partition_col);
-    g_object_unref(string_data_type);
-    g_object_unref(field);
-    if (cur_record_batch == NULL) {
-      if (error != NULL) {
-        printf("Could not add column at %u: %s\n", pos, error->message);
-        g_error_free(error);
-      }
-    }
-  }
-  return cur_record_batch;
-}
-
 // append a batch to our context
 static void add_batch_to_context(
   ArrowContext* context,
-  ArrowFFIData* arrow_data,
-  PartitionList* partition_cols,
-  const CStringMap* partition_values)
+  ArrowFFIData* arrow_data)
 {
   GArrowSchema* schema = get_schema(&arrow_data->schema);
   GArrowRecordBatch* record_batch = get_record_batch(&arrow_data->array, schema);
@@ -142,11 +67,6 @@ static void add_batch_to_context(
     g_object_unref(context->cur_filter);
     context->cur_filter = NULL;
   }
-  record_batch = add_partition_columns(record_batch, partition_cols, partition_values);
-  if (record_batch == NULL) {
-    printf("Failed to add partition columns, not adding batch\n");
-    return;
-  }
   context->batches = g_list_append(context->batches, record_batch);
   context->num_batches++;
   print_diag(
@@ -187,20 +107,52 @@ static GArrowBooleanArray* slice_to_arrow_bool_array(const KernelBoolSlice slice
   return (GArrowBooleanArray*)ret;
 }
 
+// This will apply the transform in the context to the specified data. This consumes the passed
+// ExclusiveEngineData and return a new transformed one
+static ExclusiveEngineData* apply_transform(
+  struct EngineContext* context,
+  ExclusiveEngineData* data) {
+  if (!context->arrow_context->cur_transform) {
+    print_diag("  No transform needed");
+    return data;
+  }
+  print_diag("  Applying transform\n");
+  SharedExpressionEvaluator* evaluator = new_expression_evaluator(
+    context->engine,
+    context->read_schema, // input schema
+    context->arrow_context->cur_transform,
+    context->logical_schema); // output schema
+  ExternResultHandleExclusiveEngineData transformed_res = evaluate(
+    context->engine,
+    &data,
+    evaluator);
+  free_engine_data(data);
+  free_expression_evaluator(evaluator);
+  if (transformed_res.tag != OkHandleExclusiveEngineData) {
+    print_error("Failed to transform read data.", (Error*)transformed_res.err);
+    free_error((Error*)transformed_res.err);
+    return NULL;
+  }
+  return transformed_res.ok;
+}
+
 // This is the callback that will be called for each chunk of data read from the parquet file
 static void visit_read_data(void* vcontext, ExclusiveEngineData* data)
 {
   print_diag("  Converting read data to arrow\n");
   struct EngineContext* context = vcontext;
-  ExternResultArrowFFIData arrow_res = get_raw_arrow_data(data, context->engine);
+  ExclusiveEngineData* transformed = apply_transform(context, data);
+  if (!transformed) {
+    exit(-1);
+  }
+  ExternResultArrowFFIData arrow_res = get_raw_arrow_data(transformed, context->engine);
   if (arrow_res.tag != OkArrowFFIData) {
     print_error("Failed to get arrow data.", (Error*)arrow_res.err);
     free_error((Error*)arrow_res.err);
     exit(-1);
   }
   ArrowFFIData* arrow_data = arrow_res.ok;
-  add_batch_to_context(
-    context->arrow_context, arrow_data, context->partition_cols, context->partition_values);
+  add_batch_to_context(context->arrow_context, arrow_data);
   free(arrow_data); // just frees the struct, the data and schema are freed/owned by add_batch_to_context
 }
 
@@ -208,7 +160,8 @@ static void visit_read_data(void* vcontext, ExclusiveEngineData* data)
 void c_read_parquet_file(
   struct EngineContext* context,
   const KernelStringSlice path,
-  const KernelBoolSlice selection_vector)
+  const KernelBoolSlice selection_vector,
+  const Expression* transform)
 {
   int full_len = strlen(context->table_root) + path.len + 1;
   char* full_path = malloc(sizeof(char) * full_len);
@@ -233,6 +186,7 @@ void c_read_parquet_file(
     }
     context->arrow_context->cur_filter = sel_array;
   }
+  context->arrow_context->cur_transform = transform;
   ExclusiveFileReadResultIterator* read_iter = read_res.ok;
   for (;;) {
     ExternResultbool ok_res = read_result_next(read_iter, context, visit_read_data);
diff --git a/ffi/examples/read-table/arrow.h b/ffi/examples/read-table/arrow.h
index 0236b238b..8f34cdd4f 100644
--- a/ffi/examples/read-table/arrow.h
+++ b/ffi/examples/read-table/arrow.h
@@ -15,13 +15,15 @@ typedef struct ArrowContext
   gsize num_batches;
   GList* batches;
   GArrowBooleanArray* cur_filter;
+  const Expression* cur_transform;
 } ArrowContext;
 
 ArrowContext* init_arrow_context(void);
 void c_read_parquet_file(
   struct EngineContext* context,
   const KernelStringSlice path,
-  const KernelBoolSlice selection_vector);
+  const KernelBoolSlice selection_vector,
+  const Expression* transform);
 void print_arrow_context(ArrowContext* context);
 void free_arrow_context(ArrowContext* context);
 
diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c
index 704559a59..3b74355d0 100644
--- a/ffi/examples/read-table/read_table.c
+++ b/ffi/examples/read-table/read_table.c
@@ -50,6 +50,7 @@ void scan_row_callback(
   int64_t size,
   const Stats* stats,
   const DvInfo* dv_info,
+  const Expression* transform,
   const CStringMap* partition_values)
 {
   (void)size; // not using this at the moment
@@ -76,28 +77,34 @@ void scan_row_callback(
   context->partition_values = partition_values;
   print_partition_info(context, partition_values);
 #ifdef PRINT_ARROW_DATA
-  c_read_parquet_file(context, path, selection_vector);
+  c_read_parquet_file(context, path, selection_vector, transform);
 #endif
   free_bool_slice(selection_vector);
   context->partition_values = NULL;
 }
 
-// For each chunk of scan data (which may contain multiple files to scan), kernel will call this
-// function (named do_visit_scan_data to avoid conflict with visit_scan_data exported by kernel)
-void do_visit_scan_data(
-  void* engine_context,
-  ExclusiveEngineData* engine_data,
-  KernelBoolSlice selection_vec,
-  const CTransforms* transforms)
-{
+// For each chunk of scan metadata (which may contain multiple files to scan), kernel will call this
+// function (named do_visit_scan_metadata to avoid conflict with visit_scan_metadata exported by
+// kernel)
+void do_visit_scan_metadata(void* engine_context, HandleSharedScanMetadata scan_metadata) {
   print_diag("\nScan iterator found some data to read\n  Of this data, here is "
              "a selection vector\n");
-  print_selection_vector("    ", &selection_vec);
+  struct EngineContext* context = engine_context;
+
+  ExternResultKernelBoolSlice selection_vector_res =
+    selection_vector_from_scan_metadata(scan_metadata, context->engine);
+  if (selection_vector_res.tag != OkKernelBoolSlice) {
+    printf("Could not get selection vector from kernel\n");
+    exit(-1);
+  }
+  KernelBoolSlice selection_vector = selection_vector_res.ok;
+  print_selection_vector("    ", &selection_vector);
+
   // Ask kernel to iterate each individual file and call us back with extracted metadata
   print_diag("Asking kernel to call us back for each scan row (file to read)\n");
-  visit_scan_data(engine_data, selection_vec, transforms, engine_context, scan_row_callback);
-  free_bool_slice(selection_vec);
-  free_engine_data(engine_data);
+  visit_scan_metadata(scan_metadata, engine_context, scan_row_callback);
+  free_bool_slice(selection_vector);
+  free_scan_metadata(scan_metadata);
 }
 
 // Called for each element of the partition StringSliceIterator. We just turn the slice into a
@@ -112,15 +119,15 @@ void visit_partition(void* context, const KernelStringSlice partition)
 }
 
 // Build a list of partition column names.
-PartitionList* get_partition_list(SharedGlobalScanState* state)
+PartitionList* get_partition_list(SharedSnapshot* snapshot)
 {
   print_diag("Building list of partition columns\n");
-  uintptr_t count = get_partition_column_count(state);
+  uintptr_t count = get_partition_column_count(snapshot);
   PartitionList* list = malloc(sizeof(PartitionList));
   // We set the `len` to 0 here and use it to track how many items we've added to the list
   list->len = 0;
   list->cols = malloc(sizeof(char*) * count);
-  StringSliceIterator* part_iter = get_partition_columns(state);
+  StringSliceIterator* part_iter = get_partition_columns(snapshot);
   for (;;) {
     bool has_next = string_slice_next(part_iter, list, visit_partition);
     if (!has_next) {
@@ -263,6 +270,8 @@ int main(int argc, char* argv[])
   char* table_root = snapshot_table_root(snapshot, allocate_string);
   print_diag("Table root: %s\n", table_root);
 
+  PartitionList* partition_cols = get_partition_list(snapshot);
+
   print_diag("Starting table scan\n\n");
 
   ExternResultHandleSharedScan scan_res = scan(snapshot, engine, NULL);
@@ -273,10 +282,11 @@ int main(int argc, char* argv[])
 
   SharedScan* scan = scan_res.ok;
   SharedGlobalScanState* global_state = get_global_scan_state(scan);
+  SharedSchema* logical_schema = get_global_logical_schema(global_state);
   SharedSchema* read_schema = get_global_read_schema(global_state);
-  PartitionList* partition_cols = get_partition_list(global_state);
   struct EngineContext context = {
     global_state,
+    logical_schema,
     read_schema,
     table_root,
     engine,
@@ -287,26 +297,28 @@ int main(int argc, char* argv[])
 #endif
   };
 
-  ExternResultHandleSharedScanDataIterator data_iter_res = kernel_scan_data_init(engine, scan);
-  if (data_iter_res.tag != OkHandleSharedScanDataIterator) {
-    print_error("Failed to construct scan data iterator.", (Error*)data_iter_res.err);
+  ExternResultHandleSharedScanMetadataIterator data_iter_res =
+    scan_metadata_iter_init(engine, scan);
+  if (data_iter_res.tag != OkHandleSharedScanMetadataIterator) {
+    print_error("Failed to construct scan metadata iterator.", (Error*)data_iter_res.err);
     free_error((Error*)data_iter_res.err);
     return -1;
   }
 
-  SharedScanDataIterator* data_iter = data_iter_res.ok;
+  SharedScanMetadataIterator* data_iter = data_iter_res.ok;
 
-  print_diag("\nIterating scan data\n");
+  print_diag("\nIterating scan metadata\n");
 
   // iterate scan files
   for (;;) {
-    ExternResultbool ok_res = kernel_scan_data_next(data_iter, &context, do_visit_scan_data);
+    ExternResultbool ok_res =
+      scan_metadata_next(data_iter, &context, do_visit_scan_metadata);
     if (ok_res.tag != Okbool) {
-      print_error("Failed to iterate scan data.", (Error*)ok_res.err);
+      print_error("Failed to iterate scan metadata.", (Error*)ok_res.err);
       free_error((Error*)ok_res.err);
       return -1;
     } else if (!ok_res.ok) {
-      print_diag("Scan data iterator done\n");
+      print_diag("Scan metadata iterator done\n");
       break;
     }
   }
@@ -319,9 +331,10 @@ int main(int argc, char* argv[])
   context.arrow_context = NULL;
 #endif
 
-  free_kernel_scan_data(data_iter);
+  free_scan_metadata_iter(data_iter);
   free_scan(scan);
-  free_global_read_schema(read_schema);
+  free_schema(logical_schema);
+  free_schema(read_schema);
   free_global_scan_state(global_state);
   free_snapshot(snapshot);
   free_engine(engine);
diff --git a/ffi/examples/read-table/read_table.h b/ffi/examples/read-table/read_table.h
index 28d9c72dc..cf55863d9 100644
--- a/ffi/examples/read-table/read_table.h
+++ b/ffi/examples/read-table/read_table.h
@@ -14,6 +14,7 @@ typedef struct PartitionList
 struct EngineContext
 {
   SharedGlobalScanState* global_state;
+  SharedSchema* logical_schema;
   SharedSchema* read_schema;
   char* table_root;
   SharedExternEngine* engine;
diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h
index 8c29675a6..a70bd5f5a 100644
--- a/ffi/examples/read-table/schema.h
+++ b/ffi/examples/read-table/schema.h
@@ -273,7 +273,8 @@ void print_schema(SharedSnapshot* snapshot)
     .visit_timestamp = visit_timestamp,
     .visit_timestamp_ntz = visit_timestamp_ntz,
   };
-  uintptr_t schema_list_id = visit_schema(snapshot, &visitor);
+  SharedSchema* schema = logical_schema(snapshot);
+  uintptr_t schema_list_id = visit_schema(schema, &visitor);
 #ifdef VERBOSE
   printf("Schema returned in list %" PRIxPTR "\n", schema_list_id);
 #endif
@@ -281,5 +282,6 @@ void print_schema(SharedSnapshot* snapshot)
   printf("Schema:\n");
   print_list(&builder, schema_list_id, 0, 0);
   printf("\n");
+  free_schema(schema);
   free_builder(builder);
 }
diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h
index eee88d1dc..f668860c5 100644
--- a/ffi/examples/visit-expression/expression.h
+++ b/ffi/examples/visit-expression/expression.h
@@ -87,7 +87,8 @@ struct BinaryData {
   uintptr_t len;
 };
 struct Decimal {
-  uint64_t value[2];
+  int64_t hi;
+  uint64_t lo;
   uint8_t precision;
   uint8_t scale;
 };
@@ -202,15 +203,15 @@ void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStri
 }
 void visit_expr_decimal_literal(void* data,
                                 uintptr_t sibling_list_id,
-                                uint64_t value_ms,
+                                int64_t value_ms,
                                 uint64_t value_ls,
                                 uint8_t precision,
                                 uint8_t scale) {
   struct Literal* literal = malloc(sizeof(struct Literal));
   literal->type = Decimal;
   struct Decimal* dec = &literal->value.decimal;
-  dec->value[0] = value_ms;
-  dec->value[1] = value_ls;
+  dec->hi = value_ms;
+  dec->lo = value_ls;
   dec->precision = precision;
   dec->scale = scale;
   put_expr_item(data, sibling_list_id, literal, Literal);
diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h
index 7507c8de0..0b36c9de7 100644
--- a/ffi/examples/visit-expression/expression_print.h
+++ b/ffi/examples/visit-expression/expression_print.h
@@ -144,9 +144,9 @@ void print_tree_helper(ExpressionItem ref, int depth) {
         }
         case Decimal: {
           struct Decimal* dec = &lit->value.decimal;
-          printf("Decimal(%lld,%lld,%d,%d)\n",
-                 (long long)dec->value[0],
-                 (long long)dec->value[1],
+          printf("Decimal(%lld,%llu,%d,%d)\n",
+                 (long long)dec->hi,
+                 (unsigned long long)dec->lo,
                  dec->precision,
                  dec->scale);
           break;
diff --git a/ffi/src/engine_data.rs b/ffi/src/engine_data.rs
index 3363c9034..ad9b64644 100644
--- a/ffi/src/engine_data.rs
+++ b/ffi/src/engine_data.rs
@@ -1,9 +1,18 @@
 //! EngineData related ffi code
 
-use delta_kernel::{DeltaResult, EngineData};
+#[cfg(feature = "default-engine")]
+use delta_kernel::arrow::array::{
+    ffi::{FFI_ArrowArray, FFI_ArrowSchema},
+    ArrayData, StructArray,
+};
+#[cfg(feature = "default-engine")]
+use delta_kernel::DeltaResult;
+use delta_kernel::EngineData;
 use std::ffi::c_void;
 
-use crate::{ExclusiveEngineData, ExternResult, IntoExternResult, SharedExternEngine};
+use crate::ExclusiveEngineData;
+#[cfg(feature = "default-engine")]
+use crate::{ExternResult, IntoExternResult, SharedExternEngine};
 
 use super::handle::Handle;
 
@@ -45,8 +54,8 @@ unsafe fn get_raw_engine_data_impl(data: &mut Handle<ExclusiveEngineData>) -> &m
 #[cfg(feature = "default-engine")]
 #[repr(C)]
 pub struct ArrowFFIData {
-    pub array: arrow_data::ffi::FFI_ArrowArray,
-    pub schema: arrow_schema::ffi::FFI_ArrowSchema,
+    pub array: FFI_ArrowArray,
+    pub schema: FFI_ArrowSchema,
 }
 
 // TODO: This should use a callback to avoid having to have the engine free the struct
@@ -71,16 +80,16 @@ pub unsafe extern "C" fn get_raw_arrow_data(
 // TODO: This method leaks the returned pointer memory. How will the engine free it?
 #[cfg(feature = "default-engine")]
 fn get_raw_arrow_data_impl(data: Box<dyn EngineData>) -> DeltaResult<*mut ArrowFFIData> {
-    let record_batch: arrow_array::RecordBatch = data
+    let record_batch: delta_kernel::arrow::array::RecordBatch = data
         .into_any()
         .downcast::<delta_kernel::engine::arrow_data::ArrowEngineData>()
         .map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))?
         .into();
-    let sa: arrow_array::StructArray = record_batch.into();
-    let array_data: arrow_data::ArrayData = sa.into();
+    let sa: StructArray = record_batch.into();
+    let array_data: ArrayData = sa.into();
     // these call `clone`. is there a way to not copy anything and what exactly are they cloning?
-    let array = arrow_data::ffi::FFI_ArrowArray::new(&array_data);
-    let schema = arrow_schema::ffi::FFI_ArrowSchema::try_from(array_data.data_type())?;
+    let array = FFI_ArrowArray::new(&array_data);
+    let schema = FFI_ArrowSchema::try_from(array_data.data_type())?;
     let ret_data = Box::new(ArrowFFIData { array, schema });
     Ok(Box::leak(ret_data))
 }
diff --git a/ffi/src/engine_funcs.rs b/ffi/src/engine_funcs.rs
index 1afb60510..03ae289ed 100644
--- a/ffi/src/engine_funcs.rs
+++ b/ffi/src/engine_funcs.rs
@@ -2,14 +2,17 @@
 
 use std::sync::Arc;
 
-use delta_kernel::{schema::Schema, DeltaResult, FileDataReadResultIterator};
+use delta_kernel::schema::{DataType, Schema, SchemaRef};
+use delta_kernel::{
+    DeltaResult, EngineData, Expression, ExpressionEvaluator, FileDataReadResultIterator,
+};
 use delta_kernel_ffi_macros::handle_descriptor;
 use tracing::debug;
 use url::Url;
 
 use crate::{
-    scan::SharedSchema, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult,
-    KernelStringSlice, NullableCvoid, SharedExternEngine, TryFromStringSlice,
+    ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelStringSlice,
+    NullableCvoid, SharedExternEngine, SharedSchema, TryFromStringSlice,
 };
 
 use super::handle::Handle;
@@ -51,6 +54,8 @@ impl Drop for FileReadResultIterator {
 ///
 /// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by
 /// [`free_read_result_iter`]. The visitor function pointer must be non-null.
+///
+/// [`free_engine_data`]: crate::free_engine_data
 #[no_mangle]
 pub unsafe extern "C" fn read_result_next(
     mut data: Handle<ExclusiveFileReadResultIterator>,
@@ -97,7 +102,7 @@ pub unsafe extern "C" fn free_read_result_iter(data: Handle<ExclusiveFileReadRes
 /// Caller is responsible for calling with a valid `ExternEngineHandle` and `FileMeta`
 #[no_mangle]
 pub unsafe extern "C" fn read_parquet_file(
-    engine: Handle<SharedExternEngine>,
+    engine: Handle<SharedExternEngine>, // TODO Does this cause a free?
     file: &FileMeta,
     physical_schema: Handle<SharedSchema>,
 ) -> ExternResult<Handle<ExclusiveFileReadResultIterator>> {
@@ -115,7 +120,7 @@ fn read_parquet_file_impl(
     physical_schema: Arc<Schema>,
 ) -> DeltaResult<Handle<ExclusiveFileReadResultIterator>> {
     let engine = extern_engine.engine();
-    let parquet_handler = engine.get_parquet_handler();
+    let parquet_handler = engine.parquet_handler();
     let location = Url::parse(path?)?;
     let delta_fm = delta_kernel::FileMeta {
         location,
@@ -130,3 +135,111 @@ fn read_parquet_file_impl(
     });
     Ok(res.into())
 }
+
+// Expression Eval
+
+#[handle_descriptor(target=dyn ExpressionEvaluator, mutable=false)]
+pub struct SharedExpressionEvaluator;
+
+/// Creates a new expression evaluator as provided by the passed engines `EvaluationHandler`.
+///
+/// # Safety
+/// Caller is responsible for calling with a valid `Engine`, `Expression`, and `SharedSchema`s
+#[no_mangle]
+pub unsafe extern "C" fn new_expression_evaluator(
+    engine: Handle<SharedExternEngine>,
+    input_schema: Handle<SharedSchema>,
+    expression: &Expression,
+    // TODO: Make this a data_type, and give a way for c code to go between schema <-> datatype
+    output_type: Handle<SharedSchema>,
+) -> Handle<SharedExpressionEvaluator> {
+    let engine = unsafe { engine.clone_as_arc() };
+    let input_schema = unsafe { input_schema.clone_as_arc() };
+    let output_type: DataType = output_type.as_ref().clone().into();
+    new_expression_evaluator_impl(engine, input_schema, expression, output_type)
+}
+
+fn new_expression_evaluator_impl(
+    extern_engine: Arc<dyn ExternEngine>,
+    input_schema: SchemaRef,
+    expression: &Expression,
+    output_type: DataType,
+) -> Handle<SharedExpressionEvaluator> {
+    let engine = extern_engine.engine();
+    let evaluator = engine.evaluation_handler().new_expression_evaluator(
+        input_schema,
+        expression.clone(),
+        output_type,
+    );
+    evaluator.into()
+}
+
+/// Free an expression evaluator
+/// # Safety
+///
+/// Caller is responsible for passing a valid handle.
+#[no_mangle]
+pub unsafe extern "C" fn free_expression_evaluator(evaluator: Handle<SharedExpressionEvaluator>) {
+    debug!("engine released evaluator");
+    evaluator.drop_handle();
+}
+
+/// Use the passed `evaluator` to evaluate its expression against the passed `batch` data.
+///
+/// # Safety
+/// Caller is responsible for calling with a valid `Engine`, `ExclusiveEngineData`, and `Evaluator`
+#[no_mangle]
+pub unsafe extern "C" fn evaluate(
+    engine: Handle<SharedExternEngine>,
+    batch: &mut Handle<ExclusiveEngineData>,
+    evaluator: Handle<SharedExpressionEvaluator>,
+) -> ExternResult<Handle<ExclusiveEngineData>> {
+    let engine = unsafe { engine.clone_as_arc() };
+    let batch = unsafe { batch.as_mut() };
+    let evaluator = unsafe { evaluator.clone_as_arc() };
+    let res = evaluate_impl(batch, evaluator.as_ref());
+    res.into_extern_result(&engine.as_ref())
+}
+
+fn evaluate_impl(
+    batch: &dyn EngineData,
+    evaluator: &dyn ExpressionEvaluator,
+) -> DeltaResult<Handle<ExclusiveEngineData>> {
+    evaluator.evaluate(batch).map(Into::into)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{free_expression_evaluator, new_expression_evaluator};
+    use crate::{free_engine, handle::Handle, tests::get_default_engine, SharedSchema};
+    use delta_kernel::{
+        schema::{DataType, StructField, StructType},
+        Expression,
+    };
+    use std::sync::Arc;
+
+    #[test]
+    fn test_new_evaluator() {
+        let engine = get_default_engine();
+        let in_schema = Arc::new(StructType::new(vec![StructField::new(
+            "a",
+            DataType::LONG,
+            true,
+        )]));
+        let expr = Expression::literal(1);
+        let output_type: Handle<SharedSchema> = in_schema.clone().into();
+        let in_schema_handle: Handle<SharedSchema> = in_schema.into();
+        unsafe {
+            let evaluator = new_expression_evaluator(
+                engine.shallow_copy(),
+                in_schema_handle.shallow_copy(),
+                &expr,
+                output_type.shallow_copy(),
+            );
+            in_schema_handle.drop_handle();
+            output_type.drop_handle();
+            free_engine(engine);
+            free_expression_evaluator(evaluator);
+        }
+    }
+}
diff --git a/ffi/src/error.rs b/ffi/src/error.rs
index a615d0330..fd5fb87e2 100644
--- a/ffi/src/error.rs
+++ b/ffi/src/error.rs
@@ -52,6 +52,7 @@ pub enum KernelError {
     ChangeDataFeedUnsupported,
     ChangeDataFeedIncompatibleSchema,
     InvalidCheckpoint,
+    LiteralExpressionTransformError,
 }
 
 impl From<Error> for KernelError {
@@ -110,6 +111,9 @@ impl From<Error> for KernelError {
                 KernelError::ChangeDataFeedIncompatibleSchema
             }
             Error::InvalidCheckpoint(_) => KernelError::InvalidCheckpoint,
+            Error::LiteralExpressionTransformError(_) => {
+                KernelError::LiteralExpressionTransformError
+            }
         }
     }
 }
diff --git a/ffi/src/expressions/engine.rs b/ffi/src/expressions/engine.rs
index 2e839c50f..9492feafd 100644
--- a/ffi/src/expressions/engine.rs
+++ b/ffi/src/expressions/engine.rs
@@ -28,12 +28,14 @@ impl KernelExpressionVisitorState {
 ///
 /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate,
 /// along with a visitor function that can be invoked to recursively visit the predicate. This
-/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the
+/// engine state must be valid until the call to [`scan::scan`] returns. Inside that method, the
 /// kernel allocates visitor state, which becomes the second argument to the predicate visitor
 /// invocation along with the engine-provided predicate pointer. The visitor state is valid for the
 /// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and
 /// kernel each retain ownership of their respective objects, with no need to coordinate memory
 /// lifetimes with the other.
+///
+/// [`scan::scan`]: crate::scan::scan
 #[repr(C)]
 pub struct EnginePredicate {
     pub predicate: *mut c_void,
@@ -45,7 +47,7 @@ fn wrap_expression(state: &mut KernelExpressionVisitorState, expr: impl Into<Exp
     state.inflight_expressions.insert(expr.into())
 }
 
-pub fn unwrap_kernel_expression(
+pub(crate) fn unwrap_kernel_expression(
     state: &mut KernelExpressionVisitorState,
     exprid: usize,
 ) -> Option<Expression> {
diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs
index a5116db47..c8ce1b2d4 100644
--- a/ffi/src/expressions/kernel.rs
+++ b/ffi/src/expressions/kernel.rs
@@ -53,8 +53,8 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi
 /// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed
 /// to visitor methods
 /// TODO: Visit type information in struct field and null. This will likely involve using the schema
-/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant
-/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412
+/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is
+/// the relevant issue: <https://github.com/delta-io/delta-kernel-rs/issues/412>
 #[repr(C)]
 pub struct EngineExpressionVisitor {
     /// An opaque engine state pointer
@@ -96,7 +96,7 @@ pub struct EngineExpressionVisitor {
     pub visit_literal_decimal: extern "C" fn(
         data: *mut c_void,
         sibling_list_id: usize,
-        value_ms: u64,
+        value_ms: i64,
         value_ls: u64,
         precision: u8,
         scale: u8,
@@ -189,6 +189,29 @@ pub struct EngineExpressionVisitor {
 pub unsafe extern "C" fn visit_expression(
     expression: &Handle<SharedExpression>,
     visitor: &mut EngineExpressionVisitor,
+) -> usize {
+    visit_expression_internal(expression.as_ref(), visitor)
+}
+
+/// Visit the expression of the passed [`Expression`] pointer using the provided `visitor`.  See the
+/// documentation of [`EngineExpressionVisitor`] for a description of how this visitor works.
+///
+/// This method returns the id that the engine generated for the top level expression
+///
+/// # Safety
+///
+/// The caller must pass a valid Expression pointer and expression visitor
+#[no_mangle]
+pub unsafe extern "C" fn visit_expression_ref(
+    expression: &Expression,
+    visitor: &mut EngineExpressionVisitor,
+) -> usize {
+    visit_expression_internal(expression, visitor)
+}
+
+fn visit_expression_internal(
+    expression: &Expression,
+    visitor: &mut EngineExpressionVisitor,
 ) -> usize {
     macro_rules! call {
         ( $visitor:ident, $visitor_fn:ident $(, $extra_args:expr) *) => {
@@ -295,14 +318,12 @@ pub unsafe extern "C" fn visit_expression(
                 buf.len()
             ),
             Scalar::Decimal(value, precision, scale) => {
-                let ms: u64 = (value >> 64) as u64;
-                let ls: u64 = *value as u64;
                 call!(
                     visitor,
                     visit_literal_decimal,
                     sibling_list_id,
-                    ms,
-                    ls,
+                    (value >> 64) as i64,
+                    *value as u64,
                     *precision,
                     *scale
                 )
@@ -367,6 +388,6 @@ pub unsafe extern "C" fn visit_expression(
         }
     }
     let top_level = call!(visitor, make_field_list, 1);
-    visit_expression_impl(visitor, expression.as_ref(), top_level);
+    visit_expression_impl(visitor, expression, top_level);
     top_level
 }
diff --git a/ffi/src/handle.rs b/ffi/src/handle.rs
index 30b695ecc..6a29cad52 100644
--- a/ffi/src/handle.rs
+++ b/ffi/src/handle.rs
@@ -88,14 +88,14 @@ mod private {
     /// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must
     /// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel
     /// API call at a time. If thread races are possible, the handle should be protected with a
-    /// mutex. Due to Rust [reference
-    /// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references),
-    /// this requirement applies even for API calls that appear to be read-only (because Rust code
-    /// always receives the handle as mutable).
+    /// mutex. Due to Rust [reference rules], this requirement applies even for API calls that
+    /// appear to be read-only (because Rust code always receives the handle as mutable).
     ///
     /// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can
     /// freely access shared (non-mutable) handles.
     ///
+    /// [reference rules]:
+    /// https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references
     /// cbindgen:transparent-typedef
     #[repr(transparent)]
     pub struct Handle<H: HandleDescriptor> {
diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs
index caf04ef2c..e24553158 100644
--- a/ffi/src/lib.rs
+++ b/ffi/src/lib.rs
@@ -11,6 +11,7 @@ use std::sync::Arc;
 use tracing::debug;
 use url::Url;
 
+use delta_kernel::schema::Schema;
 use delta_kernel::snapshot::Snapshot;
 use delta_kernel::{DeltaResult, Engine, EngineData, Table};
 use delta_kernel_ffi_macros::handle_descriptor;
@@ -80,7 +81,7 @@ impl Iterator for EngineIterator {
 ///
 /// Whoever instantiates the struct must ensure it does not outlive the data it points to. The
 /// compiler cannot help us here, because raw pointers don't have lifetimes. A good rule of thumb is
-/// to always use the [`kernel_string_slice`] macro to create string slices, and to avoid returning
+/// to always use the `kernel_string_slice` macro to create string slices, and to avoid returning
 /// a string slice from a code block or function (since the move risks over-extending its lifetime):
 ///
 /// ```ignore
@@ -330,7 +331,9 @@ pub unsafe extern "C" fn free_row_indexes(slice: KernelRowIndexArray) {
 /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into
 /// some kernel calls to operate on the data, or can be converted into the raw data as read by the
 /// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`]
-#[handle_descriptor(target=dyn EngineData, mutable=true, sized=false)]
+///
+/// [`get_raw_engine_data`]: crate::engine_data::get_raw_engine_data
+#[handle_descriptor(target=dyn EngineData, mutable=true)]
 pub struct ExclusiveEngineData;
 
 /// Drop an `ExclusiveEngineData`.
@@ -352,12 +355,14 @@ pub trait ExternEngine: Send + Sync {
 #[handle_descriptor(target=dyn ExternEngine, mutable=false)]
 pub struct SharedExternEngine;
 
+#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
 struct ExternEngineVtable {
     // Actual engine instance to use
     engine: Arc<dyn Engine>,
     allocate_error: AllocateErrorFn,
 }
 
+#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
 impl Drop for ExternEngineVtable {
     fn drop(&mut self) {
         debug!("dropping engine interface");
@@ -368,6 +373,7 @@ impl Drop for ExternEngineVtable {
 ///
 /// Kernel doesn't use any threading or concurrency. If engine chooses to do so, engine is
 /// responsible for handling  any races that could result.
+#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
 unsafe impl Send for ExternEngineVtable {}
 
 /// # Safety
@@ -379,8 +385,10 @@ unsafe impl Send for ExternEngineVtable {}
 /// Basically, by failing to implement these traits, we forbid the engine from being able to declare
 /// its thread-safety (because rust assumes it is not threadsafe). By implementing them, we leave it
 /// up to the engine to enforce thread safety if engine chooses to use threads at all.
+#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
 unsafe impl Sync for ExternEngineVtable {}
 
+#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
 impl ExternEngine for ExternEngineVtable {
     fn engine(&self) -> Arc<dyn Engine> {
         self.engine.clone()
@@ -561,6 +569,9 @@ pub unsafe extern "C" fn free_engine(engine: Handle<SharedExternEngine>) {
     engine.drop_handle();
 }
 
+#[handle_descriptor(target=Schema, mutable=false, sized=true)]
+pub struct SharedSchema;
+
 #[handle_descriptor(target=Snapshot, mutable=false, sized=true)]
 pub struct SharedSnapshot;
 
@@ -607,12 +618,32 @@ pub unsafe extern "C" fn version(snapshot: Handle<SharedSnapshot>) -> u64 {
     snapshot.version()
 }
 
+/// Get the logical schema of the specified snapshot
+///
+/// # Safety
+///
+/// Caller is responsible for passing a valid snapshot handle.
+#[no_mangle]
+pub unsafe extern "C" fn logical_schema(snapshot: Handle<SharedSnapshot>) -> Handle<SharedSchema> {
+    let snapshot = unsafe { snapshot.as_ref() };
+    snapshot.schema().into()
+}
+
+/// Free a schema
+///
+/// # Safety
+/// Engine is responsible for providing a valid schema handle.
+#[no_mangle]
+pub unsafe extern "C" fn free_schema(schema: Handle<SharedSchema>) {
+    schema.drop_handle();
+}
+
 /// Get the resolved root of the table. This should be used in any future calls that require
 /// constructing a path
 ///
 /// # Safety
 ///
-/// Caller is responsible for passing a valid handle.
+/// Caller is responsible for passing a valid snapshot handle.
 #[no_mangle]
 pub unsafe extern "C" fn snapshot_table_root(
     snapshot: Handle<SharedSnapshot>,
@@ -623,6 +654,30 @@ pub unsafe extern "C" fn snapshot_table_root(
     allocate_fn(kernel_string_slice!(table_root))
 }
 
+/// Get a count of the number of partition columns for this snapshot
+///
+/// # Safety
+/// Caller is responsible for passing a valid snapshot handle
+#[no_mangle]
+pub unsafe extern "C" fn get_partition_column_count(snapshot: Handle<SharedSnapshot>) -> usize {
+    let snapshot = unsafe { snapshot.as_ref() };
+    snapshot.metadata().partition_columns().len()
+}
+
+/// Get an iterator of the list of partition columns for this snapshot.
+///
+/// # Safety
+/// Caller is responsible for passing a valid snapshot handle.
+#[no_mangle]
+pub unsafe extern "C" fn get_partition_columns(
+    snapshot: Handle<SharedSnapshot>,
+) -> Handle<StringSliceIterator> {
+    let snapshot = unsafe { snapshot.as_ref() };
+    let iter: Box<StringIter> =
+        Box::new(snapshot.metadata().partition_columns().clone().into_iter());
+    iter.into()
+}
+
 type StringIter = dyn Iterator<Item = String> + Send;
 
 #[handle_descriptor(target=StringIter, mutable=true, sized=false)]
@@ -630,8 +685,11 @@ pub struct StringSliceIterator;
 
 /// # Safety
 ///
-/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
-/// [kernel_scan_data_free]. The visitor function pointer must be non-null.
+/// The iterator must be valid (returned by [`scan_metadata_iter_init`]) and not yet freed by
+/// [`free_scan_metadata_iter`]. The visitor function pointer must be non-null.
+///
+/// [`scan_metadata_iter_init`]: crate::scan::scan_metadata_iter_init
+/// [`free_scan_metadata_iter`]: crate::scan::free_scan_metadata_iter
 #[no_mangle]
 pub unsafe extern "C" fn string_slice_next(
     data: Handle<StringSliceIterator>,
@@ -718,8 +776,8 @@ impl<T> Default for ReferenceSet<T> {
 #[cfg(test)]
 mod tests {
     use delta_kernel::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine};
-    use object_store::{memory::InMemory, path::Path};
-    use test_utils::{actions_to_string, add_commit, TestAction};
+    use object_store::memory::InMemory;
+    use test_utils::{actions_to_string, actions_to_string_partitioned, add_commit, TestAction};
 
     use super::*;
     use crate::error::{EngineError, KernelError};
@@ -768,7 +826,7 @@ mod tests {
         }
     }
 
-    fn get_default_engine() -> Handle<SharedExternEngine> {
+    pub(crate) fn get_default_engine() -> Handle<SharedExternEngine> {
         let path = "memory:///doesntmatter/foo";
         let path = kernel_string_slice!(path);
         let builder = unsafe { ok_or_panic(get_engine_builder(path, allocate_err)) };
@@ -792,11 +850,7 @@ mod tests {
             actions_to_string(vec![TestAction::Metadata]),
         )
         .await?;
-        let engine = DefaultEngine::new(
-            storage.clone(),
-            Path::from("/"),
-            Arc::new(TokioBackgroundExecutor::new()),
-        );
+        let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new()));
         let engine = engine_to_handle(Arc::new(engine), allocate_err);
         let path = "memory:///";
 
@@ -816,6 +870,42 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_snapshot_partition_cols() -> Result<(), Box<dyn std::error::Error>> {
+        let storage = Arc::new(InMemory::new());
+        add_commit(
+            storage.as_ref(),
+            0,
+            actions_to_string_partitioned(vec![TestAction::Metadata]),
+        )
+        .await?;
+        let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new()));
+        let engine = engine_to_handle(Arc::new(engine), allocate_err);
+        let path = "memory:///";
+
+        let snapshot =
+            unsafe { ok_or_panic(snapshot(kernel_string_slice!(path), engine.shallow_copy())) };
+
+        let partition_count = unsafe { get_partition_column_count(snapshot.shallow_copy()) };
+        assert_eq!(partition_count, 1, "Should have one partition");
+
+        let partition_iter = unsafe { get_partition_columns(snapshot.shallow_copy()) };
+
+        #[no_mangle]
+        extern "C" fn visit_partition(_context: NullableCvoid, slice: KernelStringSlice) {
+            let s = unsafe { String::try_from_slice(&slice) }.unwrap();
+            assert_eq!(s.as_str(), "val", "Partition col should be 'val'");
+        }
+        while unsafe { string_slice_next(partition_iter.shallow_copy(), None, visit_partition) } {
+            // validate happens inside visit_partition
+        }
+
+        unsafe { free_string_slice_data(partition_iter) }
+        unsafe { free_snapshot(snapshot) }
+        unsafe { free_engine(engine) }
+        Ok(())
+    }
+
     #[test]
     #[cfg(feature = "sync-engine")]
     fn sync_engine() {
diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs
index 73f691010..1a797566b 100644
--- a/ffi/src/scan.rs
+++ b/ffi/src/scan.rs
@@ -3,11 +3,10 @@
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 
-use delta_kernel::scan::state::{visit_scan_files, DvInfo, GlobalScanState};
-use delta_kernel::scan::{Scan, ScanData};
-use delta_kernel::schema::Schema;
+use delta_kernel::scan::state::{DvInfo, GlobalScanState};
+use delta_kernel::scan::{Scan, ScanMetadata};
 use delta_kernel::snapshot::Snapshot;
-use delta_kernel::{DeltaResult, Error, ExpressionRef};
+use delta_kernel::{DeltaResult, Error, Expression, ExpressionRef};
 use delta_kernel_ffi_macros::handle_descriptor;
 use tracing::debug;
 use url::Url;
@@ -15,23 +14,57 @@ use url::Url;
 use crate::expressions::engine::{
     unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState,
 };
+use crate::expressions::SharedExpression;
 use crate::{
-    kernel_string_slice, AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult,
-    IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid,
-    SharedExternEngine, SharedSnapshot, StringIter, StringSliceIterator, TryFromStringSlice,
+    kernel_string_slice, AllocateStringFn, ExternEngine, ExternResult, IntoExternResult,
+    KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, SharedExternEngine,
+    SharedSchema, SharedSnapshot, TryFromStringSlice,
 };
 
 use super::handle::Handle;
 
 // TODO: Why do we even need to expose a scan, when the only thing an engine can do with it is
-// handit back to the kernel by calling `kernel_scan_data_init`? There isn't even an FFI method to
+// handit back to the kernel by calling `scan_metadata_iter_init`? There isn't even an FFI method to
 // drop it!
 #[handle_descriptor(target=Scan, mutable=false, sized=true)]
 pub struct SharedScan;
 
+#[handle_descriptor(target=ScanMetadata, mutable=false, sized=true)]
+pub struct SharedScanMetadata;
+
+/// Drop a `SharedScanMetadata`.
+///
+/// # Safety
+///
+/// Caller is responsible for passing a valid scan data handle.
+#[no_mangle]
+pub unsafe extern "C" fn free_scan_metadata(scan_metadata: Handle<SharedScanMetadata>) {
+    scan_metadata.drop_handle();
+}
+
+/// Get a selection vector out of a [`SharedScanMetadata`] struct
+///
+/// # Safety
+/// Engine is responsible for providing valid pointers for each argument
+#[no_mangle]
+pub unsafe extern "C" fn selection_vector_from_scan_metadata(
+    scan_metadata: Handle<SharedScanMetadata>,
+    engine: Handle<SharedExternEngine>,
+) -> ExternResult<KernelBoolSlice> {
+    let scan_metadata = unsafe { scan_metadata.as_ref() };
+    selection_vector_from_scan_metadata_impl(scan_metadata).into_extern_result(&engine.as_ref())
+}
+
+fn selection_vector_from_scan_metadata_impl(
+    scan_metadata: &ScanMetadata,
+) -> DeltaResult<KernelBoolSlice> {
+    Ok(scan_metadata.scan_files.selection_vector.clone().into())
+}
+
 /// Drops a scan.
+///
 /// # Safety
-/// Caller is responsible for passing a [valid][Handle#Validity] scan handle.
+/// Caller is responsible for passing a valid scan handle.
 #[no_mangle]
 pub unsafe extern "C" fn free_scan(scan: Handle<SharedScan>) {
     scan.drop_handle();
@@ -70,8 +103,6 @@ fn scan_impl(
 
 #[handle_descriptor(target=GlobalScanState, mutable=false, sized=true)]
 pub struct SharedGlobalScanState;
-#[handle_descriptor(target=Schema, mutable=false, sized=true)]
-pub struct SharedSchema;
 
 /// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`]
 /// for more information.
@@ -99,36 +130,17 @@ pub unsafe extern "C" fn get_global_read_schema(
     state.physical_schema.clone().into()
 }
 
-/// Free a global read schema
-///
-/// # Safety
-/// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`]
-#[no_mangle]
-pub unsafe extern "C" fn free_global_read_schema(schema: Handle<SharedSchema>) {
-    schema.drop_handle();
-}
-
-/// Get a count of the number of partition columns for this scan
-///
-/// # Safety
-/// Caller is responsible for passing a valid global scan pointer.
-#[no_mangle]
-pub unsafe extern "C" fn get_partition_column_count(state: Handle<SharedGlobalScanState>) -> usize {
-    let state = unsafe { state.as_ref() };
-    state.partition_columns.len()
-}
-
-/// Get an iterator of the list of partition columns for this scan.
+/// Get the kernel view of the physical read schema that an engine should read from parquet file in
+/// a scan
 ///
 /// # Safety
-/// Caller is responsible for passing a valid global scan pointer.
+/// Engine is responsible for providing a valid GlobalScanState pointer
 #[no_mangle]
-pub unsafe extern "C" fn get_partition_columns(
+pub unsafe extern "C" fn get_global_logical_schema(
     state: Handle<SharedGlobalScanState>,
-) -> Handle<StringSliceIterator> {
+) -> Handle<SharedSchema> {
     let state = unsafe { state.as_ref() };
-    let iter: Box<StringIter> = Box::new(state.partition_columns.clone().into_iter());
-    iter.into()
+    state.logical_schema.clone().into()
 }
 
 /// # Safety
@@ -145,11 +157,11 @@ pub unsafe extern "C" fn free_global_scan_state(state: Handle<SharedGlobalScanSt
 // means kernel made the decision of how to achieve thread safety. This may not be desirable if the
 // engine is single-threaded, or has its own mutual exclusion mechanisms. Deadlock is even a
 // conceivable risk, if this interacts poorly with engine's mutual exclusion mechanism.
-pub struct KernelScanDataIterator {
+pub struct ScanMetadataIterator {
     // Mutex -> Allow the iterator to be accessed safely by multiple threads.
     // Box -> Wrap its unsized content this struct is fixed-size with thin pointers.
-    // Item = DeltaResult<ScanData>
-    data: Mutex<Box<dyn Iterator<Item = DeltaResult<ScanData>> + Send>>,
+    // Item = DeltaResult<ScanMetadata>
+    data: Mutex<Box<dyn Iterator<Item = DeltaResult<ScanMetadata>> + Send>>,
 
     // Also keep a reference to the external engine for its error allocator. The default Parquet and
     // Json handlers don't hold any reference to the tokio reactor they rely on, so the iterator
@@ -157,85 +169,83 @@ pub struct KernelScanDataIterator {
     engine: Arc<dyn ExternEngine>,
 }
 
-#[handle_descriptor(target=KernelScanDataIterator, mutable=false, sized=true)]
-pub struct SharedScanDataIterator;
+#[handle_descriptor(target=ScanMetadataIterator, mutable=false, sized=true)]
+pub struct SharedScanMetadataIterator;
 
-impl Drop for KernelScanDataIterator {
+impl Drop for ScanMetadataIterator {
     fn drop(&mut self) {
-        debug!("dropping KernelScanDataIterator");
+        debug!("dropping ScanMetadataIterator");
     }
 }
 
 /// Get an iterator over the data needed to perform a scan. This will return a
-/// [`KernelScanDataIterator`] which can be passed to [`kernel_scan_data_next`] to get the actual
-/// data in the iterator.
+/// [`ScanMetadataIterator`] which can be passed to [`scan_metadata_next`] to get the
+/// actual data in the iterator.
 ///
 /// # Safety
 ///
 /// Engine is responsible for passing a valid [`SharedExternEngine`] and [`SharedScan`]
 #[no_mangle]
-pub unsafe extern "C" fn kernel_scan_data_init(
+pub unsafe extern "C" fn scan_metadata_iter_init(
     engine: Handle<SharedExternEngine>,
     scan: Handle<SharedScan>,
-) -> ExternResult<Handle<SharedScanDataIterator>> {
+) -> ExternResult<Handle<SharedScanMetadataIterator>> {
     let engine = unsafe { engine.clone_as_arc() };
     let scan = unsafe { scan.as_ref() };
-    kernel_scan_data_init_impl(&engine, scan).into_extern_result(&engine.as_ref())
+    scan_metadata_iter_init_impl(&engine, scan).into_extern_result(&engine.as_ref())
 }
 
-fn kernel_scan_data_init_impl(
+fn scan_metadata_iter_init_impl(
     engine: &Arc<dyn ExternEngine>,
     scan: &Scan,
-) -> DeltaResult<Handle<SharedScanDataIterator>> {
-    let scan_data = scan.scan_data(engine.engine().as_ref())?;
-    let data = KernelScanDataIterator {
-        data: Mutex::new(Box::new(scan_data)),
+) -> DeltaResult<Handle<SharedScanMetadataIterator>> {
+    let scan_metadata = scan.scan_metadata(engine.engine().as_ref())?;
+    let data = ScanMetadataIterator {
+        data: Mutex::new(Box::new(scan_metadata)),
         engine: engine.clone(),
     };
     Ok(Arc::new(data).into())
 }
 
-/// Call the provided `engine_visitor` on the next scan data item. The visitor will be provided with
-/// a selection vector and engine data. It is the responsibility of the _engine_ to free these when
-/// it is finished by calling [`free_bool_slice`] and [`free_engine_data`] respectively.
+/// Call the provided `engine_visitor` on the next scan metadata item. The visitor will be provided with
+/// a [`SharedScanMetadata`], which contains the actual scan files and the associated selection vector. It is the
+/// responsibility of the _engine_ to free the associated resources after use by calling
+/// [`free_engine_data`] and [`free_bool_slice`] respectively.
 ///
 /// # Safety
 ///
-/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
-/// [`free_kernel_scan_data`]. The visitor function pointer must be non-null.
+/// The iterator must be valid (returned by [scan_metadata_iter_init]) and not yet freed by
+/// [`free_scan_metadata_iter`]. The visitor function pointer must be non-null.
+///
+/// [`free_bool_slice`]: crate::free_bool_slice
+/// [`free_engine_data`]: crate::free_engine_data
 #[no_mangle]
-pub unsafe extern "C" fn kernel_scan_data_next(
-    data: Handle<SharedScanDataIterator>,
+pub unsafe extern "C" fn scan_metadata_next(
+    data: Handle<SharedScanMetadataIterator>,
     engine_context: NullableCvoid,
     engine_visitor: extern "C" fn(
         engine_context: NullableCvoid,
-        engine_data: Handle<ExclusiveEngineData>,
-        selection_vector: KernelBoolSlice,
-        transforms: &CTransforms,
+        scan_metadata: Handle<SharedScanMetadata>,
     ),
 ) -> ExternResult<bool> {
     let data = unsafe { data.as_ref() };
-    kernel_scan_data_next_impl(data, engine_context, engine_visitor)
+    scan_metadata_next_impl(data, engine_context, engine_visitor)
         .into_extern_result(&data.engine.as_ref())
 }
-fn kernel_scan_data_next_impl(
-    data: &KernelScanDataIterator,
+fn scan_metadata_next_impl(
+    data: &ScanMetadataIterator,
     engine_context: NullableCvoid,
     engine_visitor: extern "C" fn(
         engine_context: NullableCvoid,
-        engine_data: Handle<ExclusiveEngineData>,
-        selection_vector: KernelBoolSlice,
-        transforms: &CTransforms,
+        scan_metadata: Handle<SharedScanMetadata>,
     ),
 ) -> DeltaResult<bool> {
     let mut data = data
         .data
         .lock()
         .map_err(|_| Error::generic("poisoned mutex"))?;
-    if let Some((data, sel_vec, transforms)) = data.next().transpose()? {
-        let bool_slice = KernelBoolSlice::from(sel_vec);
-        let transform_map = CTransforms { transforms };
-        (engine_visitor)(engine_context, data.into(), bool_slice, &transform_map);
+    if let Some(scan_metadata) = data.next().transpose()? {
+        (engine_visitor)(engine_context, Arc::new(scan_metadata).into());
         Ok(true)
     } else {
         Ok(false)
@@ -245,11 +255,11 @@ fn kernel_scan_data_next_impl(
 /// # Safety
 ///
 /// Caller is responsible for (at most once) passing a valid pointer returned by a call to
-/// [`kernel_scan_data_init`].
+/// [`scan_metadata_iter_init`].
 // we should probably be consistent with drop vs. free on engine side (probably the latter is more
 // intuitive to non-rust code)
 #[no_mangle]
-pub unsafe extern "C" fn free_kernel_scan_data(data: Handle<SharedScanDataIterator>) {
+pub unsafe extern "C" fn free_scan_metadata_iter(data: Handle<SharedScanMetadataIterator>) {
     data.drop_handle();
 }
 
@@ -263,12 +273,23 @@ pub struct Stats {
     pub num_records: u64,
 }
 
+/// This callback will be invoked for each valid file that needs to be read for a scan.
+///
+/// The arguments to the callback are:
+/// * `context`: a `void*` context this can be anything that engine needs to pass through to each call
+/// * `path`: a `KernelStringSlice` which is the path to the file
+/// * `size`: an `i64` which is the size of the file
+/// * `dv_info`: a [`DvInfo`] struct, which allows getting the selection vector for this file
+/// * `transform`: An optional expression that, if not `NULL`, _must_ be applied to physical data to
+///   convert it to the correct logical format. If this is `NULL`, no transform is needed.
+/// * `partition_values`: [DEPRECATED] a `HashMap<String, String>` which are partition values
 type CScanCallback = extern "C" fn(
     engine_context: NullableCvoid,
     path: KernelStringSlice,
     size: i64,
     stats: Option<&Stats>,
     dv_info: &DvInfo,
+    transform: Option<&Expression>,
     partition_map: &CStringMap,
 );
 
@@ -303,10 +324,40 @@ pub unsafe extern "C" fn get_from_string_map(
         .and_then(|v| allocate_fn(kernel_string_slice!(v)))
 }
 
+/// Transformation expressions that need to be applied to each row `i` in ScanMetadata. You can use
+/// [`get_transform_for_row`] to get the transform for a particular row. If that returns an
+/// associated expression, it _must_ be applied to the data read from the file specified by the
+/// row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If
+/// `get_transform_for_row` returns `NULL` no expression need be applied and the data read from disk
+/// is already in the correct logical state.
+///
+/// NB: If you are using `visit_scan_metadata` you don't need to worry about dealing with probing
+/// `CTransforms`. The callback will be invoked with the correct transform for you.
 pub struct CTransforms {
     transforms: Vec<Option<ExpressionRef>>,
 }
 
+#[no_mangle]
+/// Allow getting the transform for a particular row. If the requested row is outside the range of
+/// the passed `CTransforms` returns `NULL`, otherwise returns the element at the index of the
+/// specified row. See also [`CTransforms`] above.
+///
+/// # Safety
+///
+/// The engine is responsible for providing a valid [`CTransforms`] pointer, and for checking if the
+/// return value is `NULL` or not.
+pub unsafe extern "C" fn get_transform_for_row(
+    row: usize,
+    transforms: &CTransforms,
+) -> Option<Handle<SharedExpression>> {
+    transforms
+        .transforms
+        .get(row)
+        .cloned()
+        .flatten()
+        .map(Into::into)
+}
+
 /// Get a selection vector out of a [`DvInfo`] struct
 ///
 /// # Safety
@@ -369,9 +420,10 @@ fn rust_callback(
     size: i64,
     kernel_stats: Option<delta_kernel::scan::state::Stats>,
     dv_info: DvInfo,
-    _transform: Option<ExpressionRef>,
+    transform: Option<ExpressionRef>,
     partition_values: HashMap<String, String>,
 ) {
+    let transform = transform.map(|e| e.as_ref().clone());
     let partition_map = CStringMap {
         values: partition_values,
     };
@@ -384,6 +436,7 @@ fn rust_callback(
         size,
         stats.as_ref(),
         &dv_info,
+        transform.as_ref(),
         &partition_map,
     );
 }
@@ -394,32 +447,25 @@ struct ContextWrapper {
     callback: CScanCallback,
 }
 
-/// Shim for ffi to call visit_scan_data. This will generally be called when iterating through scan
-/// data which provides the data handle and selection vector as each element in the iterator.
+/// Shim for ffi to call visit_scan_metadata. This will generally be called when iterating through scan
+/// data which provides the [`SharedScanMetadata`] as each element in the iterator.
 ///
 /// # Safety
-/// engine is responsible for passing a valid [`ExclusiveEngineData`] and selection vector.
+/// engine is responsible for passing a valid [`SharedScanMetadata`].
 #[no_mangle]
-pub unsafe extern "C" fn visit_scan_data(
-    data: Handle<ExclusiveEngineData>,
-    selection_vec: KernelBoolSlice,
-    transforms: &CTransforms,
+pub unsafe extern "C" fn visit_scan_metadata(
+    scan_metadata: Handle<SharedScanMetadata>,
     engine_context: NullableCvoid,
     callback: CScanCallback,
 ) {
-    let selection_vec = unsafe { selection_vec.as_ref() };
-    let data = unsafe { data.as_ref() };
+    let scan_metadata = unsafe { scan_metadata.as_ref() };
     let context_wrapper = ContextWrapper {
         engine_context,
         callback,
     };
+
     // TODO: return ExternResult to caller instead of panicking?
-    visit_scan_files(
-        data,
-        selection_vec,
-        &transforms.transforms,
-        context_wrapper,
-        rust_callback,
-    )
-    .unwrap();
+    scan_metadata
+        .visit_scan_files(context_wrapper, rust_callback)
+        .unwrap();
 }
diff --git a/ffi/src/schema.rs b/ffi/src/schema.rs
index f033ac8d9..a474c80c3 100644
--- a/ffi/src/schema.rs
+++ b/ffi/src/schema.rs
@@ -1,7 +1,8 @@
 use std::os::raw::c_void;
 
-use crate::scan::{CStringMap, SharedSchema};
-use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot};
+use crate::handle::Handle;
+use crate::scan::CStringMap;
+use crate::{kernel_string_slice, KernelStringSlice, SharedSchema};
 use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType};
 
 /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own
@@ -192,23 +193,6 @@ pub struct EngineSchemaVisitor {
     ),
 }
 
-/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the
-/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works.
-///
-/// This method returns the id of the list allocated to hold the top level schema columns.
-///
-/// # Safety
-///
-/// Caller is responsible for passing a valid snapshot handle and schema visitor.
-#[no_mangle]
-pub unsafe extern "C" fn visit_snapshot_schema(
-    snapshot: Handle<SharedSnapshot>,
-    visitor: &mut EngineSchemaVisitor,
-) -> usize {
-    let snapshot = unsafe { snapshot.as_ref() };
-    visit_schema_impl(snapshot.schema(), visitor)
-}
-
 /// Visit the given `schema` using the provided `visitor`. See the documentation of
 /// [`EngineSchemaVisitor`] for a description of how this visitor works.
 ///
diff --git a/ffi/tests/invalid-handle-code/private-constructor.stderr b/ffi/tests/invalid-handle-code/private-constructor.stderr
index b6d9c5e07..14f35a5bf 100644
--- a/ffi/tests/invalid-handle-code/private-constructor.stderr
+++ b/ffi/tests/invalid-handle-code/private-constructor.stderr
@@ -2,4 +2,4 @@ error[E0451]: field `ptr` of struct `Handle` is private
   --> tests/invalid-handle-code/private-constructor.rs:10:41
    |
 10 |     let _: Handle<SharedFoo> = Handle { ptr: std::ptr::NonNull::dangling() };
-   |                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ private field
+   |                                         ^^^ private field
diff --git a/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected b/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected
index 4a062b104..324ef0086 100644
--- a/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected
+++ b/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected
@@ -6,6 +6,14 @@ Schema:
 ├─ number: long
 └─ a_float: double
 
+letter:  [
+  "a",
+  "e",
+  "f",
+  "a",
+  "b",
+  "c"
+]
 number:  [
   4,
   5,
@@ -22,11 +30,3 @@ a_float:  [
   2.2,
   3.3
 ]
-letter:  [
-  "a",
-  "e",
-  "f",
-  "a",
-  "b",
-  "c"
-]
diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml
index cc0a5abd1..02e924260 100644
--- a/integration-tests/Cargo.toml
+++ b/integration-tests/Cargo.toml
@@ -6,19 +6,4 @@ edition = "2021"
 [workspace]
 
 [dependencies]
-arrow = "=53.0.0"
-delta_kernel = { path = "../kernel", features = ["arrow-conversion", "arrow-expression", "default-engine", "sync-engine"] }
-
-[patch.'file:///../kernel']
-arrow = "=53.0.0"
-arrow-arith = "=53.0.0"
-arrow-array = "=53.0.0"
-arrow-buffer = "=53.0.0"
-arrow-cast = "=53.0.0"
-arrow-data = "=53.0.0"
-arrow-ord = "=53.0.0"
-arrow-json = "=53.0.0"
-arrow-select = "=53.0.0"
-arrow-schema = "=53.0.0"
-parquet = "=53.0.0"
-object_store = "=0.11.1"
+delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] }
diff --git a/integration-tests/src/main.rs b/integration-tests/src/main.rs
index 55a809e8c..db26d0e4d 100644
--- a/integration-tests/src/main.rs
+++ b/integration-tests/src/main.rs
@@ -1,15 +1,16 @@
-fn create_arrow_schema() -> arrow::datatypes::Schema {
-    use arrow::datatypes::{DataType, Field, Schema};
+use delta_kernel::arrow::datatypes::{DataType, Field, Schema};
+
+fn create_arrow_schema() -> Schema {
     let field_a = Field::new("a", DataType::Int64, false);
     let field_b = Field::new("b", DataType::Boolean, false);
     Schema::new(vec![field_a, field_b])
 }
 
 fn create_kernel_schema() -> delta_kernel::schema::Schema {
-    use delta_kernel::schema::{DataType, Schema, StructField};
+    use delta_kernel::schema::{DataType, StructField};
     let field_a = StructField::not_null("a", DataType::LONG);
     let field_b = StructField::not_null("b", DataType::BOOLEAN);
-    Schema::new(vec![field_a, field_b])
+    delta_kernel::schema::Schema::new(vec![field_a, field_b])
 }
 
 fn main() {
diff --git a/integration-tests/test-all-arrow-versions.sh b/integration-tests/test-all-arrow-versions.sh
index 35c8fdc7d..e4207a56e 100755
--- a/integration-tests/test-all-arrow-versions.sh
+++ b/integration-tests/test-all-arrow-versions.sh
@@ -2,38 +2,43 @@
 
 set -eu -o pipefail
 
-is_version_le() {
-    [  "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
+clean_up () {
+  CODE=$?
+  git checkout HEAD Cargo.toml
+  exit $CODE
 }
 
-is_version_lt() {
-  if [ "$1" = "$2" ]
-  then
-    return 1
-  else
-    is_version_le "$1" "$2"
-  fi
-}
+# ensure we checkout the clean version of Cargo.toml no matter how we exit
+trap clean_up EXIT
 
 test_arrow_version() {
   ARROW_VERSION="$1"
   echo "== Testing version $ARROW_VERSION =="
-  sed -i'' -e "s/\(arrow[^\"]*=[^\"]*\).*/\1\"=$ARROW_VERSION\"/" Cargo.toml
-  sed -i'' -e "s/\(parquet[^\"]*\).*/\1\"=$ARROW_VERSION\"/" Cargo.toml
   cargo clean
   rm -f Cargo.lock
   cargo update
+  echo "Cargo.toml is:"
   cat Cargo.toml
-  cargo run
+  echo ""
+  if [ "$ARROW_VERSION" = "ALL_ENABLED" ]; then
+    echo "testing with --all-features"
+    cargo run --all-features
+  else
+    echo "testing with --features ${ARROW_VERSION}"
+    cargo run --features ${ARROW_VERSION}
+  fi
 }
 
-MIN_ARROW_VER="53.0.0"
-MAX_ARROW_VER="54.0.0"
+FEATURES=$(cat ../kernel/Cargo.toml | grep -e ^arrow_ | awk '{ print $1 }' | sort -u)
 
-for ARROW_VERSION in $(curl -s https://crates.io/api/v1/crates/arrow | jq -r '.versions[].num' | tr -d '\r')
+
+echo "[features]" >> Cargo.toml
+
+for ARROW_VERSION in ${FEATURES}
 do
-  if ! is_version_lt "$ARROW_VERSION" "$MIN_ARROW_VER" && is_version_lt "$ARROW_VERSION" "$MAX_ARROW_VER"
-  then
-    test_arrow_version "$ARROW_VERSION"
-  fi
+  echo "${ARROW_VERSION} = [\"delta_kernel/${ARROW_VERSION}\"]" >> Cargo.toml
+  test_arrow_version $ARROW_VERSION
 done
+
+test_arrow_version "ALL_ENABLED"
+
diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml
index 1431b1ff1..323aee250 100644
--- a/kernel/Cargo.toml
+++ b/kernel/Cargo.toml
@@ -30,13 +30,15 @@ pre-release-hook = [
   "--unreleased",
   "--prepend",
   "../CHANGELOG.md",
+  "--include-path",
+  "*",
   "--tag",
   "{{version}}",
 ]
 
 [dependencies]
 bytes = "1.7"
-chrono = { version = "0.4" }
+chrono = "=0.4.39"
 fix-hidden-lifetime-bug = "0.2"
 indexmap = "2.5.0"
 itertools = "0.13"
@@ -47,31 +49,33 @@ thiserror = "1"
 # only for structured logging
 tracing = { version = "0.1", features = ["log"] }
 url = "2"
-uuid = "1.10.0"
+uuid = { version = "1.10.0", features = ["v4", "fast-rng"] }
 z85 = "3.0.5"
 
 # bring in our derive macros
-delta_kernel_derive = { path = "../derive-macros", version = "0.6.1" }
+delta_kernel_derive = { path = "../derive-macros", version = "0.9.0" }
 
 # used for developer-visibility
 visibility = "0.1.1"
 
 # Used in the sync engine
 tempfile = { version = "3", optional = true }
+
+# Arrow supported versions
+## 53
 # Used in default engine
-arrow-buffer = { workspace = true, optional = true }
-arrow-array = { workspace = true, optional = true, features = ["chrono-tz"] }
-arrow-select = { workspace = true, optional = true }
-arrow-arith = { workspace = true, optional = true }
-arrow-cast = { workspace = true, optional = true }
-arrow-json = { workspace = true, optional = true }
-arrow-ord = { workspace = true, optional = true }
-arrow-schema = { workspace = true, optional = true }
+arrow_53 = { package = "arrow", version = "53", features = ["chrono-tz", "ffi", "json", "prettyprint"], optional = true }
+# Used in default and sync engine
+parquet_53 = { package = "parquet", version = "53", features = ["async", "object_store"] , optional = true }
+######
+## 54
+arrow_54 = { package = "arrow", version = "54", features = ["chrono-tz", "ffi", "json", "prettyprint"], optional = true }
+parquet_54 = { package = "parquet", version = "54", features = ["async", "object_store"] , optional = true }
+######
+
 futures = { version = "0.3", optional = true }
 object_store = { workspace = true, optional = true }
 hdfs-native-object-store = { workspace = true, optional = true }
-# Used in default and sync engine
-parquet = { workspace = true, optional = true }
 # Used for fetching direct urls (like pre-signed urls)
 reqwest = { version = "0.12.8", default-features = false, optional = true }
 strum = { version = "0.26", features = ["derive"] }
@@ -85,14 +89,17 @@ hdfs-native = { workspace = true, optional = true }
 walkdir = { workspace = true, optional = true }
 
 [features]
-arrow-conversion = ["arrow-schema"]
-arrow-expression = [
-  "arrow-arith",
-  "arrow-array",
-  "arrow-buffer",
-  "arrow-ord",
-  "arrow-schema",
-]
+# The default version to be expected
+arrow = ["arrow_53"]
+
+arrow_53 = ["dep:arrow_53", "dep:parquet_53"]
+
+arrow_54 = ["dep:arrow_54", "dep:parquet_54"]
+
+need_arrow = []
+arrow-conversion = ["need_arrow"]
+arrow-expression = ["need_arrow"]
+
 cloud = [
   "object_store/aws",
   "object_store/azure",
@@ -107,19 +114,10 @@ default = []
 default-engine-base = [
   "arrow-conversion",
   "arrow-expression",
-  "arrow-array",
-  "arrow-buffer",
-  "arrow-cast",
-  "arrow-json",
-  "arrow-schema",
-  "arrow-select",
   "futures",
+  "need_arrow",
   "object_store",
-  "parquet/async",
-  "parquet/object_store",
   "tokio",
-  "uuid/v4",
-  "uuid/fast-rng",
 ]
 
 # the default-engine use the reqwest crate with default features which uses native-tls. if you want
@@ -134,13 +132,7 @@ default-engine-rustls = [
 
 developer-visibility = []
 sync-engine = [
-  "arrow-cast",
-  "arrow-conversion",
-  "arrow-expression",
-  "arrow-array",
-  "arrow-json",
-  "arrow-select",
-  "parquet",
+  "need_arrow",
   "tempfile",
 ]
 integration-test = [
@@ -156,9 +148,9 @@ version = "=0.5.9"
 rustc_version = "0.4.1"
 
 [dev-dependencies]
-arrow = { workspace = true, features = ["json", "prettyprint"] }
-delta_kernel = { path = ".", features = ["default-engine", "sync-engine"] }
+delta_kernel = { path = ".", features = ["arrow", "default-engine", "sync-engine"] }
 test_utils = { path = "../test-utils" }
+async-trait = "0.1" # only used for our custom SlowGetStore ObjectStore implementation
 paste = "1.0"
 test-log = { version = "0.2", default-features = false, features = ["trace"] }
 tempfile = "3"
diff --git a/kernel/examples/inspect-table/Cargo.toml b/kernel/examples/inspect-table/Cargo.toml
index b81a8ac5b..4208c6938 100644
--- a/kernel/examples/inspect-table/Cargo.toml
+++ b/kernel/examples/inspect-table/Cargo.toml
@@ -5,11 +5,11 @@ edition = "2021"
 publish = false
 
 [dependencies]
-arrow-array = { workspace = true }
-arrow-schema = { workspace = true }
+arrow = "53"
 clap = { version = "4.5", features = ["derive"] }
 delta_kernel = { path = "../../../kernel", features = [
   "cloud",
+  "arrow_53",
   "default-engine",
   "developer-visibility",
 ] }
diff --git a/kernel/examples/inspect-table/src/main.rs b/kernel/examples/inspect-table/src/main.rs
index f5145905e..fab6a412b 100644
--- a/kernel/examples/inspect-table/src/main.rs
+++ b/kernel/examples/inspect-table/src/main.rs
@@ -41,7 +41,7 @@ enum Commands {
     /// Show the table's schema
     Schema,
     /// Show the meta-data that would be used to scan the table
-    ScanData,
+    ScanMetadata,
     /// Show each action from the log-segments
     Actions {
         /// Show the log in reverse order (default is log replay order -- newest first)
@@ -207,23 +207,17 @@ fn try_main() -> DeltaResult<()> {
         Commands::Schema => {
             println!("{:#?}", snapshot.schema());
         }
-        Commands::ScanData => {
+        Commands::ScanMetadata => {
             let scan = ScanBuilder::new(snapshot).build()?;
-            let scan_data = scan.scan_data(&engine)?;
-            for res in scan_data {
-                let (data, vector, transforms) = res?;
-                delta_kernel::scan::state::visit_scan_files(
-                    data.as_ref(),
-                    &vector,
-                    &transforms,
-                    (),
-                    print_scan_file,
-                )?;
+            let scan_metadata_iter = scan.scan_metadata(&engine)?;
+            for res in scan_metadata_iter {
+                let scan_metadata = res?;
+                scan_metadata.visit_scan_files((), print_scan_file)?;
             }
         }
         Commands::Actions { oldest_first } => {
             let log_schema = get_log_schema();
-            let actions = snapshot.log_segment().replay(
+            let actions = snapshot.log_segment().read_actions(
                 &engine,
                 log_schema.clone(),
                 log_schema.clone(),
diff --git a/kernel/examples/read-table-changes/Cargo.toml b/kernel/examples/read-table-changes/Cargo.toml
index 181da7dc6..35f077bc2 100644
--- a/kernel/examples/read-table-changes/Cargo.toml
+++ b/kernel/examples/read-table-changes/Cargo.toml
@@ -8,14 +8,12 @@ publish = false
 release = false
 
 [dependencies]
-arrow-array = { workspace = true }
-arrow-schema = { workspace = true }
 clap = { version = "4.5", features = ["derive"] }
 delta_kernel = { path = "../../../kernel", features = [
   "cloud",
+  "arrow",
   "default-engine",
 ] }
 env_logger = "0.11.3"
 url = "2"
 itertools = "0.13"
-arrow = { workspace = true, features = ["prettyprint"] }
diff --git a/kernel/examples/read-table-changes/src/main.rs b/kernel/examples/read-table-changes/src/main.rs
index 3360a06cf..ddafc1554 100644
--- a/kernel/examples/read-table-changes/src/main.rs
+++ b/kernel/examples/read-table-changes/src/main.rs
@@ -1,8 +1,8 @@
 use std::{collections::HashMap, sync::Arc};
 
-use arrow::{compute::filter_record_batch, util::pretty::print_batches};
-use arrow_array::RecordBatch;
 use clap::Parser;
+use delta_kernel::arrow::array::RecordBatch;
+use delta_kernel::arrow::{compute::filter_record_batch, util::pretty::print_batches};
 use delta_kernel::engine::arrow_data::ArrowEngineData;
 use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor;
 use delta_kernel::engine::default::DefaultEngine;
diff --git a/kernel/examples/read-table-multi-threaded/Cargo.toml b/kernel/examples/read-table-multi-threaded/Cargo.toml
index 3362e579a..8cb7c9cd3 100644
--- a/kernel/examples/read-table-multi-threaded/Cargo.toml
+++ b/kernel/examples/read-table-multi-threaded/Cargo.toml
@@ -5,10 +5,11 @@ edition = "2021"
 publish = false
 
 [dependencies]
-arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] }
+arrow = { version = "53", features = ["prettyprint", "chrono-tz"] }
 clap = { version = "4.5", features = ["derive"] }
 delta_kernel = { path = "../../../kernel", features = [
   "cloud",
+  "arrow_53",
   "default-engine",
   "sync-engine",
   "developer-visibility",
diff --git a/kernel/examples/read-table-multi-threaded/README.md b/kernel/examples/read-table-multi-threaded/README.md
index 5c4cdebfb..8cb45ecdb 100644
--- a/kernel/examples/read-table-multi-threaded/README.md
+++ b/kernel/examples/read-table-multi-threaded/README.md
@@ -3,7 +3,7 @@ Read Table Multi-Threaded
 
 # About
 This example shows a program that reads a table using multiple threads. This shows the use of the
-`scan_data`, `global_scan_state`, and `visit_scan_files` methods, that can be used to partition work
+`scan_metadata`, `global_scan_state`, and `visit_scan_files` methods, that can be used to partition work
 to either multiple threads, or workers (in the case of a distributed engine).
 
 You can run this from the same directory as this `README.md` by running `cargo run -- [args]`.
@@ -49,4 +49,4 @@ To select specific columns you need a `--` after the column list specification.
 
 - Read `letter` and `data` columns from the `multi_partitioned` dat table:
 
-`cargo run -- --columns letter,data -- ../../../acceptance/tests/dat/out/reader_tests/generated/multi_partitioned/delta/`
+`cargo run -- --columns letter,data -- ../../../acceptance/tests/dat/out/reader_tests/generated/multi_partitioned/delta/`
\ No newline at end of file
diff --git a/kernel/examples/read-table-multi-threaded/src/main.rs b/kernel/examples/read-table-multi-threaded/src/main.rs
index 9e2cee88c..e75eeeb4e 100644
--- a/kernel/examples/read-table-multi-threaded/src/main.rs
+++ b/kernel/examples/read-table-multi-threaded/src/main.rs
@@ -20,7 +20,7 @@ use clap::{Parser, ValueEnum};
 use url::Url;
 
 /// An example program that reads a table using multiple threads. This shows the use of the
-/// scan_data and global_scan_state methods on a Scan, that can be used to partition work to either
+/// scan_metadata and global_scan_state methods on a Scan, that can be used to partition work to either
 /// multiple threads, or workers (in the case of a distributed engine).
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]
@@ -179,7 +179,7 @@ fn try_main() -> DeltaResult<()> {
     // [`delta_kernel::scan::scan_row_schema`]. Generally engines will not need to interact with
     // this data directly, and can just call [`visit_scan_files`] to get pre-parsed data back from
     // the kernel.
-    let scan_data = scan.scan_data(engine.as_ref())?;
+    let scan_metadata = scan.scan_metadata(engine.as_ref())?;
 
     // get any global state associated with this scan
     let global_state = Arc::new(scan.global_scan_state());
@@ -209,15 +209,9 @@ fn try_main() -> DeltaResult<()> {
     // done sending
     drop(record_batch_tx);
 
-    for res in scan_data {
-        let (data, vector, transforms) = res?;
-        scan_file_tx = delta_kernel::scan::state::visit_scan_files(
-            data.as_ref(),
-            &vector,
-            &transforms,
-            scan_file_tx,
-            send_scan_file,
-        )?;
+    for res in scan_metadata {
+        let scan_metadata = res?;
+        scan_file_tx = scan_metadata.visit_scan_files(scan_file_tx, send_scan_file)?;
     }
 
     // have sent all scan files, drop this so threads will exit when there's no more work
@@ -286,7 +280,7 @@ fn do_work(
         // enough meta-data was passed to each thread to correctly apply the selection
         // vector
         let read_results = engine
-            .get_parquet_handler()
+            .parquet_handler()
             .read_parquet_files(&[meta], scan_state.physical_schema.clone(), None)
             .unwrap();
 
diff --git a/kernel/examples/read-table-single-threaded/Cargo.toml b/kernel/examples/read-table-single-threaded/Cargo.toml
index dc0458139..e71959e7b 100644
--- a/kernel/examples/read-table-single-threaded/Cargo.toml
+++ b/kernel/examples/read-table-single-threaded/Cargo.toml
@@ -5,9 +5,10 @@ edition = "2021"
 publish = false
 
 [dependencies]
-arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] }
+arrow = { version = "53", features = ["prettyprint", "chrono-tz"] }
 clap = { version = "4.5", features = ["derive"] }
 delta_kernel = { path = "../../../kernel", features = [
+  "arrow_53",
   "cloud",
   "default-engine",
   "sync-engine",
diff --git a/kernel/src/actions/deletion_vector.rs b/kernel/src/actions/deletion_vector.rs
index 953b73d24..8b0c20d59 100644
--- a/kernel/src/actions/deletion_vector.rs
+++ b/kernel/src/actions/deletion_vector.rs
@@ -10,7 +10,7 @@ use url::Url;
 use delta_kernel_derive::Schema;
 
 use crate::utils::require;
-use crate::{DeltaResult, Error, FileSystemClient};
+use crate::{DeltaResult, Error, StorageHandler};
 
 #[derive(Debug, Clone, PartialEq, Eq, Schema)]
 #[cfg_attr(test, derive(serde::Serialize), serde(rename_all = "camelCase"))]
@@ -66,7 +66,7 @@ impl DeletionVectorDescriptor {
                 let path_len = self.path_or_inline_dv.len();
                 require!(
                     path_len >= 20,
-                    Error::deletion_vector("Invalid length {path_len}, must be >= 20")
+                    Error::DeletionVector(format!("Invalid length {path_len}, must be >= 20"))
                 );
                 let prefix_len = path_len - 20;
                 let decoded = z85::decode(&self.path_or_inline_dv[prefix_len..])
@@ -104,7 +104,7 @@ impl DeletionVectorDescriptor {
     //  are present, we assert they are the same
     pub fn read(
         &self,
-        fs_client: Arc<dyn FileSystemClient>,
+        storage: Arc<dyn StorageHandler>,
         parent: &Url,
     ) -> DeltaResult<RoaringTreemap> {
         match self.absolute_path(parent)? {
@@ -125,7 +125,7 @@ impl DeletionVectorDescriptor {
                 let offset = self.offset;
                 let size_in_bytes = self.size_in_bytes;
 
-                let dv_data = fs_client
+                let dv_data = storage
                     .read_files(vec![(path, None)])?
                     .next()
                     .ok_or(Error::missing_data("No deletion vector data"))??;
@@ -178,10 +178,10 @@ impl DeletionVectorDescriptor {
     /// represents a row index that is deleted from the table.
     pub fn row_indexes(
         &self,
-        fs_client: Arc<dyn FileSystemClient>,
+        storage: Arc<dyn StorageHandler>,
         parent: &Url,
     ) -> DeltaResult<Vec<u64>> {
-        Ok(self.read(fs_client, parent)?.into_iter().collect())
+        Ok(self.read(storage, parent)?.into_iter().collect())
     }
 }
 
@@ -363,9 +363,9 @@ mod tests {
     fn test_inline_read() {
         let inline = dv_inline();
         let sync_engine = SyncEngine::new();
-        let fs_client = sync_engine.get_file_system_client();
+        let storage = sync_engine.storage_handler();
         let parent = Url::parse("http://not.used").unwrap();
-        let tree_map = inline.read(fs_client, &parent).unwrap();
+        let tree_map = inline.read(storage, &parent).unwrap();
         assert_eq!(tree_map.len(), 6);
         for i in [3, 4, 7, 11, 18, 29] {
             assert!(tree_map.contains(i));
@@ -381,10 +381,10 @@ mod tests {
             std::fs::canonicalize(PathBuf::from("./tests/data/table-with-dv-small/")).unwrap();
         let parent = url::Url::from_directory_path(path).unwrap();
         let sync_engine = SyncEngine::new();
-        let fs_client = sync_engine.get_file_system_client();
+        let storage = sync_engine.storage_handler();
 
         let example = dv_example();
-        let tree_map = example.read(fs_client, &parent).unwrap();
+        let tree_map = example.read(storage, &parent).unwrap();
 
         let expected: Vec<u64> = vec![0, 9];
         let found = tree_map.iter().collect::<Vec<_>>();
@@ -441,9 +441,9 @@ mod tests {
     fn test_dv_row_indexes() {
         let example = dv_inline();
         let sync_engine = SyncEngine::new();
-        let fs_client = sync_engine.get_file_system_client();
+        let storage = sync_engine.storage_handler();
         let parent = Url::parse("http://not.used").unwrap();
-        let row_idx = example.row_indexes(fs_client, &parent).unwrap();
+        let row_idx = example.row_indexes(storage, &parent).unwrap();
 
         assert_eq!(row_idx.len(), 6);
         assert_eq!(&row_idx, &[3, 4, 7, 11, 18, 29]);
diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs
index 8bcb5df50..42e009ac5 100644
--- a/kernel/src/actions/mod.rs
+++ b/kernel/src/actions/mod.rs
@@ -1,8 +1,7 @@
 //! Provides parsing and manipulation of the various actions defined in the [Delta
 //! specification](https://github.com/delta-io/delta/blob/master/PROTOCOL.md)
 
-use std::any::type_name;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::fmt::{Debug, Display};
 use std::hash::Hash;
 use std::str::FromStr;
@@ -12,14 +11,16 @@ use self::deletion_vector::DeletionVectorDescriptor;
 use crate::actions::schemas::GetStructField;
 use crate::schema::{SchemaRef, StructType};
 use crate::table_features::{
-    ReaderFeatures, WriterFeatures, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES,
+    ReaderFeature, WriterFeature, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES,
 };
 use crate::table_properties::TableProperties;
 use crate::utils::require;
-use crate::{DeltaResult, EngineData, Error, RowVisitor as _};
+use crate::{DeltaResult, EngineData, Error, FileMeta, RowVisitor as _};
+use url::Url;
 use visitors::{MetadataVisitor, ProtocolVisitor};
 
 use delta_kernel_derive::Schema;
+use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 
 pub mod deletion_vector;
@@ -47,6 +48,8 @@ pub(crate) const COMMIT_INFO_NAME: &str = "commitInfo";
 pub(crate) const CDC_NAME: &str = "cdc";
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 pub(crate) const SIDECAR_NAME: &str = "sidecar";
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+pub(crate) const CHECKPOINT_METADATA_NAME: &str = "checkpointMetadata";
 
 static LOG_ADD_SCHEMA: LazyLock<SchemaRef> =
     LazyLock::new(|| StructType::new([Option::<Add>::get_struct_field(ADD_NAME)]).into());
@@ -61,6 +64,7 @@ static LOG_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(|| {
         Option::<CommitInfo>::get_struct_field(COMMIT_INFO_NAME),
         Option::<Cdc>::get_struct_field(CDC_NAME),
         Option::<Sidecar>::get_struct_field(SIDECAR_NAME),
+        Option::<CheckpointMetadata>::get_struct_field(CHECKPOINT_METADATA_NAME),
         // We don't support the following actions yet
         //Option::<DomainMetadata>::get_struct_field(DOMAIN_METADATA_NAME),
     ])
@@ -88,12 +92,13 @@ pub(crate) fn get_log_commit_info_schema() -> &'static SchemaRef {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Schema)]
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 #[cfg_attr(test, derive(Serialize), serde(rename_all = "camelCase"))]
-pub struct Format {
+pub(crate) struct Format {
     /// Name of the encoding for files in this table
-    pub provider: String,
+    pub(crate) provider: String,
     /// A map containing configuration options for the format
-    pub options: HashMap<String, String>,
+    pub(crate) options: HashMap<String, String>,
 }
 
 impl Default for Format {
@@ -107,49 +112,63 @@ impl Default for Format {
 
 #[derive(Debug, Default, Clone, PartialEq, Eq, Schema)]
 #[cfg_attr(test, derive(Serialize), serde(rename_all = "camelCase"))]
-pub struct Metadata {
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+pub(crate) struct Metadata {
     /// Unique identifier for this table
-    pub id: String,
+    pub(crate) id: String,
     /// User-provided identifier for this table
-    pub name: Option<String>,
+    pub(crate) name: Option<String>,
     /// User-provided description for this table
-    pub description: Option<String>,
+    pub(crate) description: Option<String>,
     /// Specification of the encoding for the files stored in the table
-    pub format: Format,
+    pub(crate) format: Format,
     /// Schema of the table
-    pub schema_string: String,
+    pub(crate) schema_string: String,
     /// Column names by which the data should be partitioned
-    pub partition_columns: Vec<String>,
+    pub(crate) partition_columns: Vec<String>,
     /// The time when this metadata action is created, in milliseconds since the Unix epoch
-    pub created_time: Option<i64>,
+    pub(crate) created_time: Option<i64>,
     /// Configuration options for the metadata action. These are parsed into [`TableProperties`].
-    pub configuration: HashMap<String, String>,
+    pub(crate) configuration: HashMap<String, String>,
 }
 
 impl Metadata {
-    pub fn try_new_from_data(data: &dyn EngineData) -> DeltaResult<Option<Metadata>> {
+    pub(crate) fn try_new_from_data(data: &dyn EngineData) -> DeltaResult<Option<Metadata>> {
         let mut visitor = MetadataVisitor::default();
         visitor.visit_rows_of(data)?;
         Ok(visitor.metadata)
     }
 
-    pub fn parse_schema(&self) -> DeltaResult<StructType> {
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    #[allow(dead_code)]
+    pub(crate) fn configuration(&self) -> &HashMap<String, String> {
+        &self.configuration
+    }
+
+    pub(crate) fn parse_schema(&self) -> DeltaResult<StructType> {
         Ok(serde_json::from_str(&self.schema_string)?)
     }
 
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    #[allow(dead_code)]
+    pub(crate) fn partition_columns(&self) -> &Vec<String> {
+        &self.partition_columns
+    }
+
     /// Parse the metadata configuration HashMap<String, String> into a TableProperties struct.
     /// Note that parsing is infallible -- any items that fail to parse are simply propagated
     /// through to the `TableProperties.unknown_properties` field.
-    pub fn parse_table_properties(&self) -> TableProperties {
+    pub(crate) fn parse_table_properties(&self) -> TableProperties {
         TableProperties::from(self.configuration.iter())
     }
 }
 
 #[derive(Default, Debug, Clone, PartialEq, Eq, Schema, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 // TODO move to another module so that we disallow constructing this struct without using the
 // try_new function.
-pub struct Protocol {
+pub(crate) struct Protocol {
     /// The minimum version of the Delta read protocol that a client must implement
     /// in order to correctly read this table
     min_reader_version: i32,
@@ -159,21 +178,36 @@ pub struct Protocol {
     /// A collection of features that a client must implement in order to correctly
     /// read this table (exist only when minReaderVersion is set to 3)
     #[serde(skip_serializing_if = "Option::is_none")]
-    reader_features: Option<Vec<String>>,
+    reader_features: Option<Vec<ReaderFeature>>,
     /// A collection of features that a client must implement in order to correctly
     /// write this table (exist only when minWriterVersion is set to 7)
     #[serde(skip_serializing_if = "Option::is_none")]
-    writer_features: Option<Vec<String>>,
+    writer_features: Option<Vec<WriterFeature>>,
+}
+
+fn parse_features<T>(features: Option<impl IntoIterator<Item = impl ToString>>) -> Option<Vec<T>>
+where
+    T: FromStr,
+    T::Err: Debug,
+{
+    features
+        .map(|fs| {
+            fs.into_iter()
+                .map(|f| T::from_str(&f.to_string()))
+                .collect()
+        })
+        .transpose()
+        .expect("Parsing FromStr should never fail with strum 'default'")
 }
 
 impl Protocol {
     /// Try to create a new Protocol instance from reader/writer versions and table features. This
     /// can fail if the protocol is invalid.
-    pub fn try_new(
+    pub(crate) fn try_new(
         min_reader_version: i32,
         min_writer_version: i32,
-        reader_features: Option<impl IntoIterator<Item = impl Into<String>>>,
-        writer_features: Option<impl IntoIterator<Item = impl Into<String>>>,
+        reader_features: Option<impl IntoIterator<Item = impl ToString>>,
+        writer_features: Option<impl IntoIterator<Item = impl ToString>>,
     ) -> DeltaResult<Self> {
         if min_reader_version == 3 {
             require!(
@@ -191,8 +225,10 @@ impl Protocol {
                 )
             );
         }
-        let reader_features = reader_features.map(|f| f.into_iter().map(Into::into).collect());
-        let writer_features = writer_features.map(|f| f.into_iter().map(Into::into).collect());
+
+        let reader_features = parse_features(reader_features);
+        let writer_features = parse_features(writer_features);
+
         Ok(Protocol {
             min_reader_version,
             min_writer_version,
@@ -203,48 +239,50 @@ impl Protocol {
 
     /// Create a new Protocol by visiting the EngineData and extracting the first protocol row into
     /// a Protocol instance. If no protocol row is found, returns Ok(None).
-    pub fn try_new_from_data(data: &dyn EngineData) -> DeltaResult<Option<Protocol>> {
+    pub(crate) fn try_new_from_data(data: &dyn EngineData) -> DeltaResult<Option<Protocol>> {
         let mut visitor = ProtocolVisitor::default();
         visitor.visit_rows_of(data)?;
         Ok(visitor.protocol)
     }
 
     /// This protocol's minimum reader version
-    pub fn min_reader_version(&self) -> i32 {
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    pub(crate) fn min_reader_version(&self) -> i32 {
         self.min_reader_version
     }
 
     /// This protocol's minimum writer version
-    pub fn min_writer_version(&self) -> i32 {
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    pub(crate) fn min_writer_version(&self) -> i32 {
         self.min_writer_version
     }
 
     /// Get the reader features for the protocol
-    pub fn reader_features(&self) -> Option<&[String]> {
+    pub(crate) fn reader_features(&self) -> Option<&[ReaderFeature]> {
         self.reader_features.as_deref()
     }
 
     /// Get the writer features for the protocol
-    pub fn writer_features(&self) -> Option<&[String]> {
+    pub(crate) fn writer_features(&self) -> Option<&[WriterFeature]> {
         self.writer_features.as_deref()
     }
 
     /// True if this protocol has the requested reader feature
-    pub fn has_reader_feature(&self, feature: &ReaderFeatures) -> bool {
+    pub(crate) fn has_reader_feature(&self, feature: &ReaderFeature) -> bool {
         self.reader_features()
-            .is_some_and(|features| features.iter().any(|f| f == feature.as_ref()))
+            .is_some_and(|features| features.contains(feature))
     }
 
     /// True if this protocol has the requested writer feature
-    pub fn has_writer_feature(&self, feature: &WriterFeatures) -> bool {
+    pub(crate) fn has_writer_feature(&self, feature: &WriterFeature) -> bool {
         self.writer_features()
-            .is_some_and(|features| features.iter().any(|f| f == feature.as_ref()))
+            .is_some_and(|features| features.contains(feature))
     }
 
     /// Check if reading a table with this protocol is supported. That is: does the kernel support
     /// the specified protocol reader version and all enabled reader features? If yes, returns unit
     /// type, otherwise will return an error.
-    pub fn ensure_read_supported(&self) -> DeltaResult<()> {
+    pub(crate) fn ensure_read_supported(&self) -> DeltaResult<()> {
         match &self.reader_features {
             // if min_reader_version = 3 and all reader features are subset of supported => OK
             Some(reader_features) if self.min_reader_version == 3 => {
@@ -274,57 +312,67 @@ impl Protocol {
 
     /// Check if writing to a table with this protocol is supported. That is: does the kernel
     /// support the specified protocol writer version and all enabled writer features?
-    pub fn ensure_write_supported(&self) -> DeltaResult<()> {
+    pub(crate) fn ensure_write_supported(&self) -> DeltaResult<()> {
         match &self.writer_features {
-            // if min_reader_version = 3 and min_writer_version = 7 and all writer features are
-            // supported => OK
-            Some(writer_features)
-                if self.min_reader_version == 3 && self.min_writer_version == 7 =>
-            {
+            Some(writer_features) if self.min_writer_version == 7 => {
+                // if we're on version 7, make sure we support all the specified features
                 ensure_supported_features(writer_features, &SUPPORTED_WRITER_FEATURES)
             }
-            // otherwise not supported
-            _ => Err(Error::unsupported(
-                "Only tables with min reader version 3 and min writer version 7 with no table features are supported."
-            )),
+            Some(_) => {
+                // there are features, but we're not on 7, so the protocol is actually broken
+                Err(Error::unsupported(
+                    "Tables with min writer version != 7 should not have table features.",
+                ))
+            }
+            None => {
+                // no features, we currently only support version 1 or 2 in this case
+                require!(
+                    self.min_writer_version == 1 || self.min_writer_version == 2,
+                    Error::unsupported(
+                        "Currently delta-kernel-rs can only write to tables with protocol.minWriterVersion = 1, 2, or 7"
+                    )
+                );
+                Ok(())
+            }
         }
     }
 }
 
-// given unparsed `table_features`, parse and check if they are subset of `supported_features`
+// given `table_features`, check if they are subset of `supported_features`
 pub(crate) fn ensure_supported_features<T>(
-    table_features: &[String],
-    supported_features: &HashSet<T>,
+    table_features: &[T],
+    supported_features: &[T],
 ) -> DeltaResult<()>
 where
+    T: Display + FromStr + Hash + Eq,
     <T as FromStr>::Err: Display,
-    T: Debug + FromStr + Hash + Eq,
 {
-    let error = |unsupported, unsupported_or_unknown| {
-        let supported = supported_features.iter().collect::<Vec<_>>();
-        let features_type = type_name::<T>()
-            .rsplit("::")
-            .next()
-            .unwrap_or("table features");
-        Error::Unsupported(format!(
-            "{} {} {:?}. Supported {} are {:?}",
-            unsupported_or_unknown, features_type, unsupported, features_type, supported
-        ))
-    };
-    let parsed_features: HashSet<T> = table_features
+    // first check if all features are supported, else we proceed to craft an error message
+    if table_features
         .iter()
-        .map(|s| T::from_str(s).map_err(|_| error(vec![s.to_string()], "Unknown")))
-        .collect::<Result<_, Error>>()?;
-    parsed_features
-        .is_subset(supported_features)
-        .then_some(())
-        .ok_or_else(|| {
-            let unsupported = parsed_features
-                .difference(supported_features)
-                .map(|f| format!("{:?}", f))
-                .collect::<Vec<_>>();
-            error(unsupported, "Unsupported")
-        })
+        .all(|feature| supported_features.contains(feature))
+    {
+        return Ok(());
+    }
+
+    // we get the type name (ReaderFeature/WriterFeature) for better error messages
+    let features_type = std::any::type_name::<T>()
+        .rsplit("::")
+        .next()
+        .unwrap_or("table feature");
+
+    // NB: we didn't do this above to avoid allocation in the common case
+    let mut unsupported = table_features
+        .iter()
+        .filter(|feature| !supported_features.contains(*feature));
+
+    Err(Error::Unsupported(format!(
+        "Unknown {}s: \"{}\". Supported {}s: \"{}\"",
+        features_type,
+        unsupported.join("\", \""),
+        features_type,
+        supported_features.iter().join("\", \""),
+    )))
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Schema)]
@@ -357,30 +405,31 @@ pub(crate) struct CommitInfo {
 
 #[derive(Debug, Clone, PartialEq, Eq, Schema)]
 #[cfg_attr(test, derive(Serialize, Default), serde(rename_all = "camelCase"))]
-pub struct Add {
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+pub(crate) struct Add {
     /// A relative path to a data file from the root of the table or an absolute path to a file
     /// that should be added to the table. The path is a URI as specified by
     /// [RFC 2396 URI Generic Syntax], which needs to be decoded to get the data file path.
     ///
     /// [RFC 2396 URI Generic Syntax]: https://www.ietf.org/rfc/rfc2396.txt
-    pub path: String,
+    pub(crate) path: String,
 
     /// A map from partition column to value for this logical file. This map can contain null in the
     /// values meaning a partition is null. We drop those values from this map, due to the
     /// `drop_null_container_values` annotation. This means an engine can assume that if a partition
     /// is found in [`Metadata`] `partition_columns`, but not in this map, its value is null.
     #[drop_null_container_values]
-    pub partition_values: HashMap<String, String>,
+    pub(crate) partition_values: HashMap<String, String>,
 
     /// The size of this data file in bytes
-    pub size: i64,
+    pub(crate) size: i64,
 
     /// The time this logical file was created, as milliseconds since the epoch.
-    pub modification_time: i64,
+    pub(crate) modification_time: i64,
 
     /// When `false` the logical file must already be present in the table or the records
     /// in the added file must be contained in one or more remove actions in the same version.
-    pub data_change: bool,
+    pub(crate) data_change: bool,
 
     /// Contains [statistics] (e.g., count, min/max values for columns) about the data in this logical file encoded as a JSON string.
     ///
@@ -412,7 +461,9 @@ pub struct Add {
 }
 
 impl Add {
-    pub fn dv_unique_id(&self) -> Option<String> {
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    #[allow(dead_code)]
+    pub(crate) fn dv_unique_id(&self) -> Option<String> {
         self.deletion_vector.as_ref().map(|dv| dv.unique_id())
     }
 }
@@ -500,27 +551,27 @@ pub(crate) struct Cdc {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Schema)]
-pub struct SetTransaction {
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+pub(crate) struct SetTransaction {
     /// A unique identifier for the application performing the transaction.
-    pub app_id: String,
+    pub(crate) app_id: String,
 
     /// An application-specific numeric identifier for this transaction.
-    pub version: i64,
+    pub(crate) version: i64,
 
     /// The time when this transaction action was created in milliseconds since the Unix epoch.
-    pub last_updated: Option<i64>,
+    pub(crate) last_updated: Option<i64>,
 }
 
 /// The sidecar action references a sidecar file which provides some of the checkpoint's
 /// file actions. This action is only allowed in checkpoints following the V2 spec.
 ///
 /// [More info]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#sidecar-file-information
-#[allow(unused)] //TODO: Remove once we implement V2 checkpoint file processing
 #[derive(Schema, Debug, PartialEq)]
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 pub(crate) struct Sidecar {
     /// A path to a sidecar file that can be either:
-    /// - A relative path (just the file name) within the `_delta_log/_sidecars` directory.  
+    /// - A relative path (just the file name) within the `_delta_log/_sidecars` directory.
     /// - An absolute path
     /// The path is a URI as specified by [RFC 2396 URI Generic Syntax], which needs to be decoded
     /// to get the file path.
@@ -538,6 +589,43 @@ pub(crate) struct Sidecar {
     pub tags: Option<HashMap<String, String>>,
 }
 
+impl Sidecar {
+    /// Convert a Sidecar record to a FileMeta.
+    ///
+    /// This helper first builds the URL by joining the provided log_root with
+    /// the "_sidecars/" folder and the given sidecar path.
+    pub(crate) fn to_filemeta(&self, log_root: &Url) -> DeltaResult<FileMeta> {
+        Ok(FileMeta {
+            location: log_root.join("_sidecars/")?.join(&self.path)?,
+            last_modified: self.modification_time,
+            size: self.size_in_bytes.try_into().map_err(|_| {
+                Error::generic(format!(
+                    "Failed to convert sidecar size {} to usize",
+                    self.size_in_bytes
+                ))
+            })?,
+        })
+    }
+}
+
+/// The CheckpointMetadata action describes details about a checkpoint following the V2 specification.
+///
+/// [More info]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#checkpoint-metadata
+#[derive(Debug, Clone, PartialEq, Eq, Schema)]
+#[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+pub(crate) struct CheckpointMetadata {
+    /// The version of the V2 spec checkpoint.
+    ///
+    /// Currently using `i64` for compatibility with other actions' representations.
+    /// Future work will address converting numeric fields to unsigned types (e.g., `u64`) where
+    /// semantically appropriate (e.g., for version, size, timestamps, etc.).
+    /// See issue #786 for tracking progress.
+    pub(crate) version: i64,
+
+    /// Map containing any additional metadata about the V2 spec checkpoint.
+    pub(crate) tags: Option<HashMap<String, String>>,
+}
+
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
@@ -698,6 +786,21 @@ mod tests {
         assert_eq!(schema, expected);
     }
 
+    #[test]
+    fn test_checkpoint_metadata_schema() {
+        let schema = get_log_schema()
+            .project(&[CHECKPOINT_METADATA_NAME])
+            .expect("Couldn't get checkpointMetadata field");
+        let expected = Arc::new(StructType::new([StructField::nullable(
+            "checkpointMetadata",
+            StructType::new([
+                StructField::not_null("version", DataType::LONG),
+                tags_field(),
+            ]),
+        )]));
+        assert_eq!(schema, expected);
+    }
+
     #[test]
     fn test_transaction_schema() {
         let schema = get_log_schema()
@@ -783,21 +886,21 @@ mod tests {
     }
 
     #[test]
-    fn test_v2_checkpoint_unsupported() {
+    fn test_v2_checkpoint_supported() {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::V2Checkpoint]),
-            Some([ReaderFeatures::V2Checkpoint]),
+            Some([ReaderFeature::V2Checkpoint]),
+            Some([ReaderFeature::V2Checkpoint]),
         )
         .unwrap();
-        assert!(protocol.ensure_read_supported().is_err());
+        assert!(protocol.ensure_read_supported().is_ok());
 
         let protocol = Protocol::try_new(
             4,
             7,
-            Some([ReaderFeatures::V2Checkpoint]),
-            Some([ReaderFeatures::V2Checkpoint]),
+            Some([ReaderFeature::V2Checkpoint]),
+            Some([ReaderFeature::V2Checkpoint]),
         )
         .unwrap();
         assert!(protocol.ensure_read_supported().is_err());
@@ -817,17 +920,17 @@ mod tests {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::V2Checkpoint]),
+            Some([ReaderFeature::V2Checkpoint]),
             Some(&empty_features),
         )
         .unwrap();
-        assert!(protocol.ensure_read_supported().is_err());
+        assert!(protocol.ensure_read_supported().is_ok());
 
         let protocol = Protocol::try_new(
             3,
             7,
             Some(&empty_features),
-            Some([WriterFeatures::V2Checkpoint]),
+            Some([WriterFeature::V2Checkpoint]),
         )
         .unwrap();
         assert!(protocol.ensure_read_supported().is_ok());
@@ -835,11 +938,11 @@ mod tests {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::V2Checkpoint]),
-            Some([WriterFeatures::V2Checkpoint]),
+            Some([ReaderFeature::V2Checkpoint]),
+            Some([WriterFeature::V2Checkpoint]),
         )
         .unwrap();
-        assert!(protocol.ensure_read_supported().is_err());
+        assert!(protocol.ensure_read_supported().is_ok());
 
         let protocol = Protocol {
             min_reader_version: 1,
@@ -860,19 +963,24 @@ mod tests {
 
     #[test]
     fn test_ensure_write_supported() {
-        let protocol = Protocol {
-            min_reader_version: 3,
-            min_writer_version: 7,
-            reader_features: Some(vec![]),
-            writer_features: Some(vec![]),
-        };
+        let protocol = Protocol::try_new(
+            3,
+            7,
+            Some::<Vec<String>>(vec![]),
+            Some(vec![
+                WriterFeature::AppendOnly,
+                WriterFeature::DeletionVectors,
+                WriterFeature::Invariants,
+            ]),
+        )
+        .unwrap();
         assert!(protocol.ensure_write_supported().is_ok());
 
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::DeletionVectors]),
-            Some([WriterFeatures::DeletionVectors]),
+            Some([ReaderFeature::DeletionVectors]),
+            Some([WriterFeature::RowTracking]),
         )
         .unwrap();
         assert!(protocol.ensure_write_supported().is_err());
@@ -880,26 +988,34 @@ mod tests {
 
     #[test]
     fn test_ensure_supported_features() {
-        let supported_features = [
-            ReaderFeatures::ColumnMapping,
-            ReaderFeatures::DeletionVectors,
-        ]
-        .into_iter()
-        .collect();
-        let table_features = vec![ReaderFeatures::ColumnMapping.to_string()];
+        let supported_features = [ReaderFeature::ColumnMapping, ReaderFeature::DeletionVectors];
+        let table_features = vec![ReaderFeature::ColumnMapping];
         ensure_supported_features(&table_features, &supported_features).unwrap();
 
         // test unknown features
-        let table_features = vec![ReaderFeatures::ColumnMapping.to_string(), "idk".to_string()];
+        let table_features = vec![ReaderFeature::ColumnMapping, ReaderFeature::unknown("idk")];
         let error = ensure_supported_features(&table_features, &supported_features).unwrap_err();
         match error {
             Error::Unsupported(e) if e ==
-                "Unknown ReaderFeatures [\"idk\"]. Supported ReaderFeatures are [ColumnMapping, DeletionVectors]"
+                "Unknown ReaderFeatures: \"idk\". Supported ReaderFeatures: \"columnMapping\", \"deletionVectors\""
             => {},
-            Error::Unsupported(e) if e ==
-                "Unknown ReaderFeatures [\"idk\"]. Supported ReaderFeatures are [DeletionVectors, ColumnMapping]"
-            => {},
-            _ => panic!("Expected unsupported error"),
+            _ => panic!("Expected unsupported error, got: {error}"),
         }
     }
+
+    #[test]
+    fn test_parse_table_feature_never_fails() {
+        // parse a non-str
+        let features = Some([5]);
+        let expected = Some(vec![ReaderFeature::unknown("5")]);
+        assert_eq!(parse_features::<ReaderFeature>(features), expected);
+
+        // weird strs
+        let features = Some(["", "absurD_)(+13%^⚙️"]);
+        let expected = Some(vec![
+            ReaderFeature::unknown(""),
+            ReaderFeature::unknown("absurD_)(+13%^⚙️"),
+        ]);
+        assert_eq!(parse_features::<ReaderFeature>(features), expected);
+    }
 }
diff --git a/kernel/src/actions/set_transaction.rs b/kernel/src/actions/set_transaction.rs
index ea1ffa6a7..a0a0517fb 100644
--- a/kernel/src/actions/set_transaction.rs
+++ b/kernel/src/actions/set_transaction.rs
@@ -7,13 +7,16 @@ use crate::{
     DeltaResult, Engine, EngineData, Expression as Expr, ExpressionRef, RowVisitor as _, SchemaRef,
 };
 
-pub use crate::actions::visitors::SetTransactionMap;
-pub struct SetTransactionScanner {
+pub(crate) use crate::actions::visitors::SetTransactionMap;
+
+#[allow(dead_code)]
+pub(crate) struct SetTransactionScanner {
     snapshot: Arc<Snapshot>,
 }
 
+#[allow(dead_code)]
 impl SetTransactionScanner {
-    pub fn new(snapshot: Arc<Snapshot>) -> Self {
+    pub(crate) fn new(snapshot: Arc<Snapshot>) -> Self {
         SetTransactionScanner { snapshot }
     }
 
@@ -59,13 +62,16 @@ impl SetTransactionScanner {
                 Expr::column([SET_TRANSACTION_NAME, "appId"]).is_not_null(),
             ))
         });
-        self.snapshot
-            .log_segment()
-            .replay(engine, schema.clone(), schema, META_PREDICATE.clone())
+        self.snapshot.log_segment().read_actions(
+            engine,
+            schema.clone(),
+            schema,
+            META_PREDICATE.clone(),
+        )
     }
 
     /// Scan the Delta Log for the latest transaction entry of an application
-    pub fn application_transaction(
+    pub(crate) fn application_transaction(
         &self,
         engine: &dyn Engine,
         application_id: &str,
@@ -75,7 +81,10 @@ impl SetTransactionScanner {
     }
 
     /// Scan the Delta Log to obtain the latest transaction for all applications
-    pub fn application_transactions(&self, engine: &dyn Engine) -> DeltaResult<SetTransactionMap> {
+    pub(crate) fn application_transactions(
+        &self,
+        engine: &dyn Engine,
+    ) -> DeltaResult<SetTransactionMap> {
         self.scan_application_transactions(engine, None)
     }
 }
diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs
index 9f34bd2c5..152a91e7d 100644
--- a/kernel/src/actions/visitors.rs
+++ b/kernel/src/actions/visitors.rs
@@ -352,7 +352,7 @@ impl RowVisitor for CdcVisitor {
             ))
         );
         for i in 0..row_count {
-            // Since path column is required, use it to detect presence of an Add action
+            // Since path column is required, use it to detect presence of a Cdc action
             if let Some(path) = getters[0].get_opt(i, "cdc.path")? {
                 self.cdcs.push(Self::visit_cdc(i, path, getters)?);
             }
@@ -361,7 +361,7 @@ impl RowVisitor for CdcVisitor {
     }
 }
 
-pub type SetTransactionMap = HashMap<String, SetTransaction>;
+pub(crate) type SetTransactionMap = HashMap<String, SetTransaction>;
 
 /// Extract application transaction actions from the log into a map
 ///
@@ -438,7 +438,6 @@ impl RowVisitor for SetTransactionVisitor {
     }
 }
 
-#[allow(unused)] //TODO: Remove once we implement V2 checkpoint file processing
 #[derive(Default)]
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 pub(crate) struct SidecarVisitor {
@@ -475,7 +474,7 @@ impl RowVisitor for SidecarVisitor {
             ))
         );
         for i in 0..row_count {
-            // Since path column is required, use it to detect presence of a sidecar action
+            // Since path column is required, use it to detect presence of a Sidecar action
             if let Some(path) = getters[0].get_opt(i, "sidecar.path")? {
                 self.sidecars.push(Self::visit_sidecar(i, path, getters)?);
             }
@@ -512,45 +511,12 @@ pub(crate) fn visit_deletion_vector_at<'a>(
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
+    use super::*;
 
-    use arrow_array::{RecordBatch, StringArray};
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use crate::arrow::array::StringArray;
 
-    use super::*;
-    use crate::{
-        actions::get_log_schema,
-        engine::arrow_data::ArrowEngineData,
-        engine::sync::{json::SyncJsonHandler, SyncEngine},
-        Engine, EngineData, JsonHandler,
-    };
-
-    // TODO(nick): Merge all copies of this into one "test utils" thing
-    fn string_array_to_engine_data(string_array: StringArray) -> Box<dyn EngineData> {
-        let string_field = Arc::new(Field::new("a", DataType::Utf8, true));
-        let schema = Arc::new(ArrowSchema::new(vec![string_field]));
-        let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)])
-            .expect("Can't convert to record batch");
-        Box::new(ArrowEngineData::new(batch))
-    }
-
-    fn action_batch() -> Box<ArrowEngineData> {
-        let handler = SyncJsonHandler {};
-        let json_strings: StringArray = vec![
-            r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues":{},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#,
-            r#"{"commitInfo":{"timestamp":1677811178585,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"635"},"engineInfo":"Databricks-Runtime/<unknown>","txnId":"a6a94671-55ef-450e-9546-b8465b9147de"}}"#,
-            r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#,
-            r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none", "delta.enableChangeDataFeed":"true"},"createdTime":1677811175819}}"#,
-            r#"{"cdc":{"path":"_change_data/age=21/cdc-00000-93f7fceb-281a-446a-b221-07b88132d203.c000.snappy.parquet","partitionValues":{"age":"21"},"size":1033,"dataChange":false}}"#,
-            r#"{"sidecar":{"path":"016ae953-37a9-438e-8683-9a9a4a79a395.parquet","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{"tag_foo":"tag_bar"}}}"#,
-        ]
-        .into();
-        let output_schema = get_log_schema().clone();
-        let parsed = handler
-            .parse_json(string_array_to_engine_data(json_strings), output_schema)
-            .unwrap();
-        ArrowEngineData::try_from_engine_data(parsed).unwrap()
-    }
+    use crate::table_features::{ReaderFeature, WriterFeature};
+    use crate::utils::test_utils::{action_batch, parse_json_batch};
 
     #[test]
     fn test_parse_protocol() -> DeltaResult<()> {
@@ -559,8 +525,8 @@ mod tests {
         let expected = Protocol {
             min_reader_version: 3,
             min_writer_version: 7,
-            reader_features: Some(vec!["deletionVectors".into()]),
-            writer_features: Some(vec!["deletionVectors".into()]),
+            reader_features: Some(vec![ReaderFeature::DeletionVectors]),
+            writer_features: Some(vec![WriterFeature::DeletionVectors]),
         };
         assert_eq!(parsed, expected);
         Ok(())
@@ -640,8 +606,6 @@ mod tests {
 
     #[test]
     fn test_parse_add_partitioned() {
-        let engine = SyncEngine::new();
-        let json_handler = engine.get_json_handler();
         let json_strings: StringArray = vec![
             r#"{"commitInfo":{"timestamp":1670892998177,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"c1\",\"c2\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}}"#,
             r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#,
@@ -651,10 +615,7 @@ mod tests {
             r#"{"add":{"path":"c1=6/c2=a/part-00011-10619b10-b691-4fd0-acc4-2a9608499d7c.c000.snappy.parquet","partitionValues":{"c1":"6","c2":"a"},"size":452,"modificationTime":1670892998137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":4},\"maxValues\":{\"c3\":4},\"nullCount\":{\"c3\":0}}"}}"#,
         ]
         .into();
-        let output_schema = get_log_schema().clone();
-        let batch = json_handler
-            .parse_json(string_array_to_engine_data(json_strings), output_schema)
-            .unwrap();
+        let batch = parse_json_batch(json_strings);
         let mut add_visitor = AddVisitor::default();
         add_visitor.visit_rows_of(batch.as_ref()).unwrap();
         let add1 = Add {
@@ -698,18 +659,13 @@ mod tests {
 
     #[test]
     fn test_parse_remove_partitioned() {
-        let engine = SyncEngine::new();
-        let json_handler = engine.get_json_handler();
         let json_strings: StringArray = vec![
             r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#,
             r#"{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}}"#,
             r#"{"remove":{"path":"c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452}}"#,
         ]
         .into();
-        let output_schema = get_log_schema().clone();
-        let batch = json_handler
-            .parse_json(string_array_to_engine_data(json_strings), output_schema)
-            .unwrap();
+        let batch = parse_json_batch(json_strings);
         let mut remove_visitor = RemoveVisitor::default();
         remove_visitor.visit_rows_of(batch.as_ref()).unwrap();
         let expected_remove = Remove {
@@ -737,8 +693,6 @@ mod tests {
 
     #[test]
     fn test_parse_txn() {
-        let engine = SyncEngine::new();
-        let json_handler = engine.get_json_handler();
         let json_strings: StringArray = vec![
             r#"{"commitInfo":{"timestamp":1670892998177,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"c1\",\"c2\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}}"#,
             r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#,
@@ -748,10 +702,7 @@ mod tests {
             r#"{"txn":{"appId":"myApp2","version": 4, "lastUpdated": 1670892998177}}"#,
         ]
         .into();
-        let output_schema = get_log_schema().clone();
-        let batch = json_handler
-            .parse_json(string_array_to_engine_data(json_strings), output_schema)
-            .unwrap();
+        let batch = parse_json_batch(json_strings);
         let mut txn_visitor = SetTransactionVisitor::default();
         txn_visitor.visit_rows_of(batch.as_ref()).unwrap();
         let mut actual = txn_visitor.set_transactions;
diff --git a/kernel/src/arrow.rs b/kernel/src/arrow.rs
new file mode 100644
index 000000000..915e603af
--- /dev/null
+++ b/kernel/src/arrow.rs
@@ -0,0 +1,17 @@
+//! This module exists to help re-export the version of arrow used by default-engine and other
+//! parts of kernel that need arrow
+
+#[cfg(feature = "arrow_53")]
+pub use arrow_53::*;
+
+#[cfg(all(feature = "arrow_54", not(feature = "arrow_53")))]
+pub use arrow_54::*;
+
+// if nothing is enabled but we need arrow because of some other feature flag, default to lowest
+// supported version
+#[cfg(all(
+    feature = "need_arrow",
+    not(feature = "arrow_53"),
+    not(feature = "arrow_54")
+))]
+compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow_53` or `arrow_54` feature");
diff --git a/kernel/src/checkpoint/log_replay.rs b/kernel/src/checkpoint/log_replay.rs
new file mode 100644
index 000000000..7867e042e
--- /dev/null
+++ b/kernel/src/checkpoint/log_replay.rs
@@ -0,0 +1,588 @@
+//! The [`CheckpointLogReplayProcessor`] implements specialized log replay logic for creating
+//! checkpoint files. It processes log files in reverse chronological order (newest to oldest)
+//! and selects the set of actions to include in a checkpoint for a specific version.
+//!
+//! ## Actions Included for Checkpointing
+//!
+//! For checkpoint creation, this processor applies several filtering and deduplication
+//! steps to each batch of log actions:
+//!
+//! 1. **Protocol and Metadata**: Retains exactly one of each - keeping only the latest protocol
+//!    and metadata actions.
+//! 2. **Txn Actions**: Keeps exactly one `txn` action for each unique app ID, always selecting
+//!    the latest one encountered.
+//! 3. **File Actions**: Resolves file actions to produce the latest state of the table, keeping
+//!    the most recent valid add actions and unexpired remove actions (tombstones) that are newer
+//!    than `minimum_file_retention_timestamp`.
+//!
+//! ## Architecture
+//!
+//! - [`CheckpointVisitor`]: Implements [`RowVisitor`] to examine each action in a batch and
+//!   determine if it should be included in the checkpoint. It maintains state for deduplication
+//!   across multiple actions in a batch and efficiently handles all filtering rules.
+//!
+//! - [`CheckpointLogReplayProcessor`]: Implements the [`LogReplayProcessor`] trait and orchestrates
+//!   the overall process. For each batch of log actions, it:
+//!   1. Creates a visitor with the current deduplication state
+//!   2. Applies the visitor to filter actions in the batch
+//!   3. Updates counters and state for cross-batch deduplication
+//!   4. Produces a [`CheckpointData`] result which includes a selection vector indicating which
+//!      actions should be included in the checkpoint file
+use std::collections::HashSet;
+use std::sync::LazyLock;
+
+use crate::engine_data::{GetData, RowVisitor, TypedGetData as _};
+use crate::log_replay::{FileActionDeduplicator, FileActionKey};
+use crate::schema::{column_name, ColumnName, ColumnNamesAndTypes, DataType};
+use crate::utils::require;
+use crate::{DeltaResult, Error};
+
+/// A visitor that filters actions for inclusion in a V1 spec checkpoint file.
+///
+/// This visitor processes actions in newest-to-oldest order (as they appear in log
+/// replay) and applies deduplication logic for both file and non-file actions to
+/// produce the actions to include in a checkpoint.
+///
+/// # File Action Filtering Rules:
+///   Kept Actions:
+/// - The first (newest) add action for each unique (path, dvId) pair
+/// - The first (newest) remove action for each unique (path, dvId) pair, but only if
+///   its deletionTimestamp > minimumFileRetentionTimestamp
+///   Omitted Actions:
+/// - Any file action (add/remove) with the same (path, dvId) as a previously processed action
+/// - All remove actions with deletionTimestamp ≤ minimumFileRetentionTimestamp
+/// - All remove actions with missing deletionTimestamp (defaults to 0)
+///
+/// The resulting filtered file actions represents files present in the table (add actions) and
+/// unexpired tombstones required for vacuum operations (remove actions).
+///
+/// # Non-File Action Filtering:
+/// - Keeps only the first protocol action (newest version)
+/// - Keeps only the first metadata action (most recent table metadata)
+/// - Keeps only the first txn action for each unique app ID
+///
+/// # Excluded Actions
+/// - CommitInfo, CDC, and CheckpointMetadata actions should not appear in the action
+///   batches processed by this visitor, as they are excluded by the schema used to
+///   read the log files upstream. If present, they will be ignored by the visitor.
+/// - Sidecar actions should also be excluded—when encountered in the log, the
+///   corresponding sidecar files are read to extract the referenced file actions,
+///   which are then included directly in the action stream instead of the sidecar actions themselves.
+/// - The CheckpointMetadata action is included down the wire when writing a V2 spec checkpoint.
+///
+/// # Memory Usage
+/// This struct has O(N + M) memory usage where:
+/// - N = number of txn actions with unique appIds
+/// - M = number of file actions with unique (path, dvId) pairs
+///
+/// The resulting filtered set of actions are the actions which should be written to a
+/// checkpoint for a corresponding version.
+pub(crate) struct CheckpointVisitor<'seen> {
+    // Deduplicates file actions (applies logic to filter Adds with corresponding Removes,
+    // and keep unexpired Removes). This deduplicator builds a set of seen file actions.
+    // This set has O(M) memory usage where M = number of file actions with unique (path, dvId) pairs
+    deduplicator: FileActionDeduplicator<'seen>,
+    // Tracks which rows to include in the final output
+    selection_vector: Vec<bool>,
+    // TODO: _last_checkpoint schema should be updated to use u64 instead of i64
+    // for fields that are not expected to be negative. (Issue #786)
+    // i64 to match the `_last_checkpoint` file schema
+    non_file_actions_count: i64,
+    // i64 to match the `_last_checkpoint` file schema
+    file_actions_count: i64,
+    // i64 to match the `_last_checkpoint` file schema
+    add_actions_count: i64,
+    // i64 for comparison with remove.deletionTimestamp
+    minimum_file_retention_timestamp: i64,
+    // Flag to track if we've seen a protocol action so we can keep only the first protocol action
+    seen_protocol: bool,
+    // Flag to track if we've seen a metadata action so we can keep only the first metadata action
+    seen_metadata: bool,
+    // Set of transaction IDs to deduplicate by appId
+    // This set has O(N) memory usage where N = number of txn actions with unique appIds
+    seen_txns: &'seen mut HashSet<String>,
+}
+
+#[allow(unused)]
+impl CheckpointVisitor<'_> {
+    // These index positions correspond to the order of columns defined in
+    // `selected_column_names_and_types()`
+    const ADD_PATH_INDEX: usize = 0; // Position of "add.path" in getters
+    const ADD_DV_START_INDEX: usize = 1; // Start position of add deletion vector columns
+    const REMOVE_PATH_INDEX: usize = 4; // Position of "remove.path" in getters
+    const REMOVE_DELETION_TIMESTAMP_INDEX: usize = 5; // Position of "remove.deletionTimestamp" in getters
+    const REMOVE_DV_START_INDEX: usize = 6; // Start position of remove deletion vector columns
+
+    // These are the column names used to access the data in the getters
+    const REMOVE_DELETION_TIMESTAMP: &'static str = "remove.deletionTimestamp";
+    const PROTOCOL_MIN_READER_VERSION: &'static str = "protocol.minReaderVersion";
+    const METADATA_ID: &'static str = "metaData.id";
+
+    pub(crate) fn new<'seen>(
+        seen_file_keys: &'seen mut HashSet<FileActionKey>,
+        is_log_batch: bool,
+        selection_vector: Vec<bool>,
+        minimum_file_retention_timestamp: i64,
+        seen_protocol: bool,
+        seen_metadata: bool,
+        seen_txns: &'seen mut HashSet<String>,
+    ) -> CheckpointVisitor<'seen> {
+        CheckpointVisitor {
+            deduplicator: FileActionDeduplicator::new(
+                seen_file_keys,
+                is_log_batch,
+                Self::ADD_PATH_INDEX,
+                Self::REMOVE_PATH_INDEX,
+                Self::ADD_DV_START_INDEX,
+                Self::REMOVE_DV_START_INDEX,
+            ),
+            selection_vector,
+            file_actions_count: 0,
+            add_actions_count: 0,
+            minimum_file_retention_timestamp,
+            seen_protocol,
+            seen_metadata,
+            seen_txns,
+            non_file_actions_count: 0,
+        }
+    }
+
+    /// Determines if a remove action tombstone has expired and should be excluded from the checkpoint.
+    ///
+    /// A remove action includes a deletion_timestamp indicating when the deletion occurred. Physical
+    /// files are deleted lazily after a user-defined expiration time. Remove actions are kept to allow
+    /// concurrent readers to read snapshots at older versions.
+    ///
+    /// Tombstone expiration rules:
+    /// - If deletion_timestamp <= minimum_file_retention_timestamp: Expired (exclude)
+    /// - If deletion_timestamp > minimum_file_retention_timestamp: Valid (include)
+    /// - If deletion_timestamp is missing: Defaults to 0, treated as expired (exclude)
+    fn is_expired_tombstone<'a>(&self, i: usize, getter: &'a dyn GetData<'a>) -> DeltaResult<bool> {
+        // Ideally this should never be zero, but we are following the same behavior as Delta
+        // Spark and the Java Kernel.
+        // Note: When remove.deletion_timestamp is not present (defaulting to 0), the remove action
+        // will be excluded from the checkpoint file as it will be treated as expired.
+        let deletion_timestamp = getter.get_opt(i, "remove.deletionTimestamp")?;
+        let deletion_timestamp = deletion_timestamp.unwrap_or(0i64);
+
+        Ok(deletion_timestamp <= self.minimum_file_retention_timestamp)
+    }
+
+    /// Processes a potential file action to determine if it should be included in the checkpoint.
+    ///
+    /// Returns Ok(true) if the row contains a valid file action to be included in the checkpoint.
+    /// Returns Ok(false) if the row doesn't contain a file action or should be skipped.
+    /// Returns Err(...) if there was an error processing the action.
+    ///
+    /// Note: This function handles both add and remove actions, applying deduplication logic and
+    /// tombstone expiration rules as needed.
+    fn check_file_action<'a>(
+        &mut self,
+        i: usize,
+        getters: &[&'a dyn GetData<'a>],
+    ) -> DeltaResult<bool> {
+        // Extract the file action and handle errors immediately
+        let (file_key, is_add) = match self.deduplicator.extract_file_action(i, getters, false)? {
+            Some(action) => action,
+            None => return Ok(false), // If no file action is found, skip this row
+        };
+
+        // Check if we've already seen this file action
+        if self.deduplicator.check_and_record_seen(file_key) {
+            return Ok(false); // Skip file actions that we've processed before
+        }
+
+        // Check for valid, non-duplicate adds and non-expired removes
+        if is_add {
+            self.add_actions_count += 1;
+        } else if self.is_expired_tombstone(i, getters[Self::REMOVE_DELETION_TIMESTAMP_INDEX])? {
+            return Ok(false); // Skip expired remove tombstones
+        }
+        self.file_actions_count += 1;
+        Ok(true) // Include this action
+    }
+
+    /// Processes a potential protocol action to determine if it should be included in the checkpoint.
+    ///
+    /// Returns Ok(true) if the row contains a valid protocol action.
+    /// Returns Ok(false) if the row doesn't contain a protocol action or is a duplicate.
+    /// Returns Err(...) if there was an error processing the action.
+    fn check_protocol_action<'a>(
+        &mut self,
+        i: usize,
+        getter: &'a dyn GetData<'a>,
+    ) -> DeltaResult<bool> {
+        // Skip protocol actions if we've already seen a newer one
+        if self.seen_protocol {
+            return Ok(false);
+        }
+
+        // minReaderVersion is a required field, so we check for its presence to determine if this is a protocol action.
+        if getter
+            .get_int(i, Self::PROTOCOL_MIN_READER_VERSION)?
+            .is_none()
+        {
+            return Ok(false); // Not a protocol action
+        }
+        // Valid, non-duplicate protocol action to be included
+        self.seen_protocol = true;
+        self.non_file_actions_count += 1;
+        Ok(true)
+    }
+
+    /// Processes a potential metadata action to determine if it should be included in the checkpoint.
+    ///
+    /// Returns Ok(true) if the row contains a valid metadata action.
+    /// Returns Ok(false) if the row doesn't contain a metadata action or is a duplicate.
+    /// Returns Err(...) if there was an error processing the action.
+    fn check_metadata_action<'a>(
+        &mut self,
+        i: usize,
+        getter: &'a dyn GetData<'a>,
+    ) -> DeltaResult<bool> {
+        // Skip metadata actions if we've already seen a newer one
+        if self.seen_metadata {
+            return Ok(false);
+        }
+
+        // id is a required field, so we check for its presence to determine if this is a metadata action.
+        if getter.get_str(i, Self::METADATA_ID)?.is_none() {
+            return Ok(false); // Not a metadata action
+        }
+
+        // Valid, non-duplicate metadata action to be included
+        self.seen_metadata = true;
+        self.non_file_actions_count += 1;
+        Ok(true)
+    }
+
+    /// Processes a potential txn action to determine if it should be included in the checkpoint.
+    ///
+    /// Returns Ok(true) if the row contains a valid txn action.
+    /// Returns Ok(false) if the row doesn't contain a txn action or is a duplicate.
+    /// Returns Err(...) if there was an error processing the action.
+    fn check_txn_action<'a>(&mut self, i: usize, getter: &'a dyn GetData<'a>) -> DeltaResult<bool> {
+        // Check for txn field
+        let Some(app_id) = getter.get_str(i, "txn.appId")? else {
+            return Ok(false); // Not a txn action
+        };
+
+        // If the app ID already exists in the set, the insertion will return false,
+        // indicating that this is a duplicate.
+        if !self.seen_txns.insert(app_id.to_string()) {
+            return Ok(false);
+        }
+
+        // Valid, non-duplicate txn action to be included
+        self.non_file_actions_count += 1;
+        Ok(true)
+    }
+
+    /// Determines if a row in the batch should be included in the checkpoint.
+    ///
+    /// This method checks each action type in sequence, short-circuiting as soon as a valid action is found.
+    /// Actions are checked in order of expected frequency of occurrence to optimize performance:
+    /// 1. File actions (most frequent)
+    /// 2. Txn actions
+    /// 3. Protocol & Metadata actions (least frequent)
+    ///
+    /// Returns Ok(true) if the row should be included in the checkpoint.
+    /// Returns Ok(false) if the row should be skipped.
+    /// Returns Err(...) if any validation or extraction failed.
+    pub(crate) fn is_valid_action<'a>(
+        &mut self,
+        i: usize,
+        getters: &[&'a dyn GetData<'a>],
+    ) -> DeltaResult<bool> {
+        // The `||` operator short-circuits the evaluation, so if any of the checks return true,
+        // the rest will not be evaluated.
+        Ok(self.check_file_action(i, getters)?
+            || self.check_txn_action(i, getters[11])?
+            || self.check_protocol_action(i, getters[10])?
+            || self.check_metadata_action(i, getters[9])?)
+    }
+}
+
+impl RowVisitor for CheckpointVisitor<'_> {
+    fn selected_column_names_and_types(&self) -> (&'static [ColumnName], &'static [DataType]) {
+        // The data columns visited must be in the following order:
+        // 1. ADD
+        // 2. REMOVE
+        // 3. METADATA
+        // 4. PROTOCOL
+        // 5. TXN
+        static NAMES_AND_TYPES: LazyLock<ColumnNamesAndTypes> = LazyLock::new(|| {
+            const STRING: DataType = DataType::STRING;
+            const INTEGER: DataType = DataType::INTEGER;
+            const LONG: DataType = DataType::LONG;
+            let types_and_names = vec![
+                // File action columns
+                (STRING, column_name!("add.path")),
+                (STRING, column_name!("add.deletionVector.storageType")),
+                (STRING, column_name!("add.deletionVector.pathOrInlineDv")),
+                (INTEGER, column_name!("add.deletionVector.offset")),
+                (STRING, column_name!("remove.path")),
+                (LONG, column_name!("remove.deletionTimestamp")),
+                (STRING, column_name!("remove.deletionVector.storageType")),
+                (STRING, column_name!("remove.deletionVector.pathOrInlineDv")),
+                (INTEGER, column_name!("remove.deletionVector.offset")),
+                // Non-file action columns
+                (STRING, column_name!("metaData.id")),
+                (INTEGER, column_name!("protocol.minReaderVersion")),
+                (STRING, column_name!("txn.appId")),
+            ];
+            let (types, names) = types_and_names.into_iter().unzip();
+            (names, types).into()
+        });
+        NAMES_AND_TYPES.as_ref()
+    }
+
+    fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<()> {
+        require!(
+            getters.len() == 12,
+            Error::InternalError(format!(
+                "Wrong number of visitor getters: {}",
+                getters.len()
+            ))
+        );
+
+        for i in 0..row_count {
+            self.selection_vector[i] = self.is_valid_action(i, getters)?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use crate::arrow::array::StringArray;
+    use crate::utils::test_utils::{action_batch, parse_json_batch};
+
+    use super::*;
+
+    #[test]
+    fn test_checkpoint_visitor() -> DeltaResult<()> {
+        let data = action_batch();
+        let mut seen_file_keys = HashSet::new();
+        let mut seen_txns = HashSet::new();
+        let mut visitor = CheckpointVisitor::new(
+            &mut seen_file_keys,
+            true,
+            vec![true; 8],
+            0, // minimum_file_retention_timestamp (no expired tombstones)
+            false,
+            false,
+            &mut seen_txns,
+        );
+
+        visitor.visit_rows_of(data.as_ref())?;
+
+        let expected = vec![
+            true,  // Row 0 is an add action (included)
+            true,  // Row 1 is a remove action (included)
+            false, // Row 2 is a commit info action (excluded)
+            true,  // Row 3 is a protocol action (included)
+            true,  // Row 4 is a metadata action (included)
+            false, // Row 5 is a cdc action (excluded)
+            false, // Row 6 is a sidecar action (excluded)
+            true,  // Row 7 is a txn action (included)
+        ];
+
+        assert_eq!(visitor.file_actions_count, 2);
+        assert_eq!(visitor.add_actions_count, 1);
+        assert!(visitor.seen_protocol);
+        assert!(visitor.seen_metadata);
+        assert_eq!(visitor.seen_txns.len(), 1);
+        assert_eq!(visitor.non_file_actions_count, 3);
+
+        assert_eq!(visitor.selection_vector, expected);
+        Ok(())
+    }
+
+    /// Tests the boundary conditions for tombstone expiration logic.
+    /// Specifically checks:
+    /// - Remove actions with deletionTimestamp == minimumFileRetentionTimestamp (should be excluded)
+    /// - Remove actions with deletionTimestamp < minimumFileRetentionTimestamp (should be excluded)
+    /// - Remove actions with deletionTimestamp > minimumFileRetentionTimestamp (should be included)
+    /// - Remove actions with missing deletionTimestamp (defaults to 0, should be excluded)
+    #[test]
+    fn test_checkpoint_visitor_boundary_cases_for_tombstone_expiration() -> DeltaResult<()> {
+        let json_strings: StringArray = vec![
+            r#"{"remove":{"path":"exactly_at_threshold","deletionTimestamp":100,"dataChange":true,"partitionValues":{}}}"#,
+            r#"{"remove":{"path":"one_below_threshold","deletionTimestamp":99,"dataChange":true,"partitionValues":{}}}"#,
+            r#"{"remove":{"path":"one_above_threshold","deletionTimestamp":101,"dataChange":true,"partitionValues":{}}}"#,
+            // Missing timestamp defaults to 0
+            r#"{"remove":{"path":"missing_timestamp","dataChange":true,"partitionValues":{}}}"#, 
+        ]
+        .into();
+        let batch = parse_json_batch(json_strings);
+
+        let mut seen_file_keys = HashSet::new();
+        let mut seen_txns = HashSet::new();
+        let mut visitor = CheckpointVisitor::new(
+            &mut seen_file_keys,
+            true,
+            vec![true; 4],
+            100, // minimum_file_retention_timestamp (threshold set to 100)
+            false,
+            false,
+            &mut seen_txns,
+        );
+
+        visitor.visit_rows_of(batch.as_ref())?;
+
+        // Only "one_above_threshold" should be kept
+        let expected = vec![false, false, true, false];
+        assert_eq!(visitor.selection_vector, expected);
+        assert_eq!(visitor.file_actions_count, 1);
+        assert_eq!(visitor.add_actions_count, 0);
+        assert_eq!(visitor.non_file_actions_count, 0);
+        Ok(())
+    }
+
+    #[test]
+    fn test_checkpoint_visitor_file_actions_in_checkpoint_batch() -> DeltaResult<()> {
+        let json_strings: StringArray = vec![
+            r#"{"add":{"path":"file1","partitionValues":{"c1":"6","c2":"a"},"size":452,"modificationTime":1670892998137,"dataChange":true}}"#,
+        ]
+        .into();
+        let batch = parse_json_batch(json_strings);
+
+        let mut seen_file_keys = HashSet::new();
+        let mut seen_txns = HashSet::new();
+        let mut visitor = CheckpointVisitor::new(
+            &mut seen_file_keys,
+            false, // is_log_batch = false (checkpoint batch)
+            vec![true; 1],
+            0,
+            false,
+            false,
+            &mut seen_txns,
+        );
+
+        visitor.visit_rows_of(batch.as_ref())?;
+
+        let expected = vec![true];
+        assert_eq!(visitor.selection_vector, expected);
+        assert_eq!(visitor.file_actions_count, 1);
+        assert_eq!(visitor.add_actions_count, 1);
+        assert_eq!(visitor.non_file_actions_count, 0);
+        // The action should NOT be added to the seen_file_keys set as it's a checkpoint batch
+        // and actions in checkpoint batches do not conflict with each other.
+        // This is a key difference from log batches, where actions can conflict.
+        assert!(seen_file_keys.is_empty());
+        Ok(())
+    }
+
+    #[test]
+    fn test_checkpoint_visitor_conflicts_with_deletion_vectors() -> DeltaResult<()> {
+        let json_strings: StringArray = vec![
+            // Add action for file1 with deletion vector
+            r#"{"add":{"path":"file1","partitionValues":{},"size":635,"modificationTime":100,"dataChange":true,"deletionVector":{"storageType":"two","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#, 
+             // Remove action for file1 with a different deletion vector
+             r#"{"remove":{"path":"file1","deletionTimestamp":100,"dataChange":true,"deletionVector":{"storageType":"one","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#,
+             // Add action for file1 with the same deletion vector as the remove action above (excluded)
+             r#"{"add":{"path":"file1","partitionValues":{},"size":635,"modificationTime":100,"dataChange":true,"deletionVector":{"storageType":"one","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#,
+         ]
+        .into();
+        let batch = parse_json_batch(json_strings);
+
+        let mut seen_file_keys = HashSet::new();
+        let mut seen_txns = HashSet::new();
+        let mut visitor = CheckpointVisitor::new(
+            &mut seen_file_keys,
+            true,
+            vec![true; 3],
+            0,
+            false,
+            false,
+            &mut seen_txns,
+        );
+
+        visitor.visit_rows_of(batch.as_ref())?;
+
+        let expected = vec![true, true, false];
+        assert_eq!(visitor.selection_vector, expected);
+        assert_eq!(visitor.file_actions_count, 2);
+        assert_eq!(visitor.add_actions_count, 1);
+        assert_eq!(visitor.non_file_actions_count, 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_checkpoint_visitor_already_seen_non_file_actions() -> DeltaResult<()> {
+        let json_strings: StringArray = vec![
+            r#"{"txn":{"appId":"app1","version":1,"lastUpdated":123456789}}"#,
+            r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#,
+            r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1677811175819}}"#,
+        ].into();
+        let batch = parse_json_batch(json_strings);
+
+        // Pre-populate with txn app1
+        let mut seen_file_keys = HashSet::new();
+        let mut seen_txns = HashSet::new();
+        seen_txns.insert("app1".to_string());
+
+        let mut visitor = CheckpointVisitor::new(
+            &mut seen_file_keys,
+            true,
+            vec![true; 3],
+            0,
+            true,           // The visior has already seen a protocol action
+            true,           // The visitor has already seen a metadata action
+            &mut seen_txns, // Pre-populated transaction
+        );
+
+        visitor.visit_rows_of(batch.as_ref())?;
+
+        // All actions should be skipped as they have already been seen
+        let expected = vec![false, false, false];
+        assert_eq!(visitor.selection_vector, expected);
+        assert_eq!(visitor.non_file_actions_count, 0);
+        assert_eq!(visitor.file_actions_count, 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_checkpoint_visitor_duplicate_non_file_actions() -> DeltaResult<()> {
+        let json_strings: StringArray = vec![
+            r#"{"txn":{"appId":"app1","version":1,"lastUpdated":123456789}}"#,
+            r#"{"txn":{"appId":"app1","version":1,"lastUpdated":123456789}}"#, // Duplicate txn
+            r#"{"txn":{"appId":"app2","version":1,"lastUpdated":123456789}}"#, // Different app ID
+            r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7}}"#,
+            r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7}}"#, // Duplicate protocol
+            r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1677811175819}}"#,
+            // Duplicate metadata
+            r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1677811175819}}"#, 
+        ]
+        .into();
+        let batch = parse_json_batch(json_strings);
+
+        let mut seen_file_keys = HashSet::new();
+        let mut seen_txns = HashSet::new();
+        let mut visitor = CheckpointVisitor::new(
+            &mut seen_file_keys,
+            true, // is_log_batch
+            vec![true; 7],
+            0, // minimum_file_retention_timestamp
+            false,
+            false,
+            &mut seen_txns,
+        );
+
+        visitor.visit_rows_of(batch.as_ref())?;
+
+        // First occurrence of each type should be included
+        let expected = vec![true, false, true, true, false, true, false];
+        assert_eq!(visitor.selection_vector, expected);
+        assert_eq!(visitor.seen_txns.len(), 2); // Two different app IDs
+        assert_eq!(visitor.non_file_actions_count, 4); // 2 txns + 1 protocol + 1 metadata
+        assert_eq!(visitor.file_actions_count, 0);
+
+        Ok(())
+    }
+}
diff --git a/kernel/src/checkpoint/mod.rs b/kernel/src/checkpoint/mod.rs
new file mode 100644
index 000000000..e18479696
--- /dev/null
+++ b/kernel/src/checkpoint/mod.rs
@@ -0,0 +1,8 @@
+//! # Delta Kernel Checkpoint API
+//!
+//! This module implements the API for writing checkpoints in delta tables.
+//! Checkpoints provide a compact summary of the table state, enabling faster recovery by
+//! avoiding full log replay. This API supports three checkpoint types:
+//!
+//! TODO!(seb): Include docs when implemented
+mod log_replay;
diff --git a/kernel/src/engine/arrow_conversion.rs b/kernel/src/engine/arrow_conversion.rs
index 0b905ff3a..a425cd143 100644
--- a/kernel/src/engine/arrow_conversion.rs
+++ b/kernel/src/engine/arrow_conversion.rs
@@ -2,10 +2,11 @@
 
 use std::sync::Arc;
 
-use arrow_schema::{
-    ArrowError, DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema,
+use crate::arrow::datatypes::{
+    DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema,
     SchemaRef as ArrowSchemaRef, TimeUnit,
 };
+use crate::arrow::error::ArrowError;
 use itertools::Itertools;
 
 use crate::error::Error;
diff --git a/kernel/src/engine/arrow_data.rs b/kernel/src/engine/arrow_data.rs
index 50a627e5c..de2dbfbec 100644
--- a/kernel/src/engine/arrow_data.rs
+++ b/kernel/src/engine/arrow_data.rs
@@ -2,17 +2,23 @@ use crate::engine_data::{EngineData, EngineList, EngineMap, GetData, RowVisitor}
 use crate::schema::{ColumnName, DataType};
 use crate::{DeltaResult, Error};
 
-use arrow_array::cast::AsArray;
-use arrow_array::types::{Int32Type, Int64Type};
-use arrow_array::{
+use crate::arrow::array::cast::AsArray;
+use crate::arrow::array::types::{Int32Type, Int64Type};
+use crate::arrow::array::{
     Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, RecordBatch, StructArray,
 };
-use arrow_schema::{DataType as ArrowDataType, FieldRef};
+use crate::arrow::datatypes::{DataType as ArrowDataType, FieldRef};
 use tracing::debug;
 
 use std::collections::{HashMap, HashSet};
 
-/// ArrowEngineData holds an Arrow RecordBatch, implements `EngineData` so the kernel can extract from it.
+pub use crate::engine::arrow_utils::fix_nested_null_masks;
+
+/// ArrowEngineData holds an Arrow `RecordBatch`, implements `EngineData` so the kernel can extract from it.
+///
+/// WARNING: Row visitors require that all leaf columns of the record batch have correctly computed
+/// NULL masks. The arrow parquet reader is known to produce incomplete NULL masks, for
+/// example. When in doubt, call [`fix_nested_null_masks`] first.
 pub struct ArrowEngineData {
     data: RecordBatch,
 }
@@ -43,6 +49,12 @@ impl From<RecordBatch> for ArrowEngineData {
     }
 }
 
+impl From<StructArray> for ArrowEngineData {
+    fn from(value: StructArray) -> Self {
+        ArrowEngineData::new(value.into())
+    }
+}
+
 impl From<ArrowEngineData> for RecordBatch {
     fn from(value: ArrowEngineData) -> Self {
         value.data
@@ -282,31 +294,20 @@ impl ArrowEngineData {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
-    use arrow_array::{RecordBatch, StringArray};
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use crate::arrow::array::StringArray;
 
+    use crate::table_features::{ReaderFeature, WriterFeature};
+    use crate::utils::test_utils::string_array_to_engine_data;
     use crate::{
         actions::{get_log_schema, Metadata, Protocol},
         engine::sync::SyncEngine,
-        DeltaResult, Engine, EngineData,
+        DeltaResult, Engine,
     };
 
-    use super::ArrowEngineData;
-
-    fn string_array_to_engine_data(string_array: StringArray) -> Box<dyn EngineData> {
-        let string_field = Arc::new(Field::new("a", DataType::Utf8, true));
-        let schema = Arc::new(ArrowSchema::new(vec![string_field]));
-        let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)])
-            .expect("Can't convert to record batch");
-        Box::new(ArrowEngineData::new(batch))
-    }
-
     #[test]
     fn test_md_extract() -> DeltaResult<()> {
         let engine = SyncEngine::new();
-        let handler = engine.get_json_handler();
+        let handler = engine.json_handler();
         let json_strings: StringArray = vec![
             r#"{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}}"#,
         ]
@@ -325,7 +326,7 @@ mod tests {
     #[test]
     fn test_protocol_extract() -> DeltaResult<()> {
         let engine = SyncEngine::new();
-        let handler = engine.get_json_handler();
+        let handler = engine.json_handler();
         let json_strings: StringArray = vec![
             r#"{"protocol": {"minReaderVersion": 3, "minWriterVersion": 7, "readerFeatures": ["rw1"], "writerFeatures": ["rw1", "w2"]}}"#,
         ]
@@ -337,10 +338,13 @@ mod tests {
         let protocol = Protocol::try_new_from_data(parsed.as_ref())?.unwrap();
         assert_eq!(protocol.min_reader_version(), 3);
         assert_eq!(protocol.min_writer_version(), 7);
-        assert_eq!(protocol.reader_features(), Some(["rw1".into()].as_slice()));
+        assert_eq!(
+            protocol.reader_features(),
+            Some([ReaderFeature::unknown("rw1")].as_slice())
+        );
         assert_eq!(
             protocol.writer_features(),
-            Some(["rw1".into(), "w2".into()].as_slice())
+            Some([WriterFeature::unknown("rw1"), WriterFeature::unknown("w2")].as_slice())
         );
         Ok(())
     }
diff --git a/kernel/src/engine/arrow_expression.rs b/kernel/src/engine/arrow_expression.rs
deleted file mode 100644
index 8ee54ebd0..000000000
--- a/kernel/src/engine/arrow_expression.rs
+++ /dev/null
@@ -1,870 +0,0 @@
-//! Expression handling based on arrow-rs compute kernels.
-use std::borrow::Borrow;
-use std::collections::HashMap;
-use std::sync::Arc;
-
-use arrow_arith::boolean::{and_kleene, is_null, not, or_kleene};
-use arrow_arith::numeric::{add, div, mul, sub};
-use arrow_array::cast::AsArray;
-use arrow_array::{types::*, MapArray};
-use arrow_array::{
-    Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Datum, Decimal128Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, RecordBatch,
-    StringArray, StructArray, TimestampMicrosecondArray,
-};
-use arrow_buffer::OffsetBuffer;
-use arrow_ord::cmp::{distinct, eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow_ord::comparison::in_list_utf8;
-use arrow_schema::{
-    ArrowError, DataType as ArrowDataType, Field as ArrowField, Fields, IntervalUnit,
-    Schema as ArrowSchema, TimeUnit,
-};
-use arrow_select::concat::concat;
-use itertools::Itertools;
-
-use super::arrow_conversion::LIST_ARRAY_ROOT;
-use super::arrow_utils::make_arrow_error;
-use crate::engine::arrow_data::ArrowEngineData;
-use crate::engine::arrow_utils::prim_array_cmp;
-use crate::engine::ensure_data_types::ensure_data_types;
-use crate::error::{DeltaResult, Error};
-use crate::expressions::{
-    BinaryExpression, BinaryOperator, Expression, Scalar, UnaryExpression, UnaryOperator,
-    VariadicExpression, VariadicOperator,
-};
-use crate::schema::{ArrayType, DataType, MapType, PrimitiveType, Schema, SchemaRef, StructField};
-use crate::{EngineData, ExpressionEvaluator, ExpressionHandler};
-
-// TODO leverage scalars / Datum
-
-fn downcast_to_bool(arr: &dyn Array) -> DeltaResult<&BooleanArray> {
-    arr.as_any()
-        .downcast_ref::<BooleanArray>()
-        .ok_or_else(|| Error::generic("expected boolean array"))
-}
-
-impl Scalar {
-    /// Convert scalar to arrow array.
-    pub fn to_array(&self, num_rows: usize) -> DeltaResult<ArrayRef> {
-        use Scalar::*;
-        let arr: ArrayRef = match self {
-            Integer(val) => Arc::new(Int32Array::from_value(*val, num_rows)),
-            Long(val) => Arc::new(Int64Array::from_value(*val, num_rows)),
-            Short(val) => Arc::new(Int16Array::from_value(*val, num_rows)),
-            Byte(val) => Arc::new(Int8Array::from_value(*val, num_rows)),
-            Float(val) => Arc::new(Float32Array::from_value(*val, num_rows)),
-            Double(val) => Arc::new(Float64Array::from_value(*val, num_rows)),
-            String(val) => Arc::new(StringArray::from(vec![val.clone(); num_rows])),
-            Boolean(val) => Arc::new(BooleanArray::from(vec![*val; num_rows])),
-            Timestamp(val) => {
-                Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows).with_timezone("UTC"))
-            }
-            TimestampNtz(val) => Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows)),
-            Date(val) => Arc::new(Date32Array::from_value(*val, num_rows)),
-            Binary(val) => Arc::new(BinaryArray::from(vec![val.as_slice(); num_rows])),
-            Decimal(val, precision, scale) => Arc::new(
-                Decimal128Array::from_value(*val, num_rows)
-                    .with_precision_and_scale(*precision, *scale as i8)?,
-            ),
-            Struct(data) => {
-                let arrays = data
-                    .values()
-                    .iter()
-                    .map(|val| val.to_array(num_rows))
-                    .try_collect()?;
-                let fields: Fields = data
-                    .fields()
-                    .iter()
-                    .map(ArrowField::try_from)
-                    .try_collect()?;
-                Arc::new(StructArray::try_new(fields, arrays, None)?)
-            }
-            Array(data) => {
-                #[allow(deprecated)]
-                let values = data.array_elements();
-                let vecs: Vec<_> = values.iter().map(|v| v.to_array(num_rows)).try_collect()?;
-                let values: Vec<_> = vecs.iter().map(|x| x.as_ref()).collect();
-                let offsets: Vec<_> = vecs.iter().map(|v| v.len()).collect();
-                let offset_buffer = OffsetBuffer::from_lengths(offsets);
-                let field = ArrowField::try_from(data.array_type())?;
-                Arc::new(ListArray::new(
-                    Arc::new(field),
-                    offset_buffer,
-                    concat(values.as_slice())?,
-                    None,
-                ))
-            }
-            Null(data_type) => match data_type {
-                DataType::Primitive(primitive) => match primitive {
-                    PrimitiveType::Byte => Arc::new(Int8Array::new_null(num_rows)),
-                    PrimitiveType::Short => Arc::new(Int16Array::new_null(num_rows)),
-                    PrimitiveType::Integer => Arc::new(Int32Array::new_null(num_rows)),
-                    PrimitiveType::Long => Arc::new(Int64Array::new_null(num_rows)),
-                    PrimitiveType::Float => Arc::new(Float32Array::new_null(num_rows)),
-                    PrimitiveType::Double => Arc::new(Float64Array::new_null(num_rows)),
-                    PrimitiveType::String => Arc::new(StringArray::new_null(num_rows)),
-                    PrimitiveType::Boolean => Arc::new(BooleanArray::new_null(num_rows)),
-                    PrimitiveType::Timestamp => {
-                        Arc::new(TimestampMicrosecondArray::new_null(num_rows).with_timezone("UTC"))
-                    }
-                    PrimitiveType::TimestampNtz => {
-                        Arc::new(TimestampMicrosecondArray::new_null(num_rows))
-                    }
-                    PrimitiveType::Date => Arc::new(Date32Array::new_null(num_rows)),
-                    PrimitiveType::Binary => Arc::new(BinaryArray::new_null(num_rows)),
-                    PrimitiveType::Decimal(precision, scale) => Arc::new(
-                        Decimal128Array::new_null(num_rows)
-                            .with_precision_and_scale(*precision, *scale as i8)?,
-                    ),
-                },
-                DataType::Struct(t) => {
-                    let fields: Fields = t.fields().map(ArrowField::try_from).try_collect()?;
-                    Arc::new(StructArray::new_null(fields, num_rows))
-                }
-                DataType::Array(t) => {
-                    let field =
-                        ArrowField::new(LIST_ARRAY_ROOT, t.element_type().try_into()?, true);
-                    Arc::new(ListArray::new_null(Arc::new(field), num_rows))
-                }
-                DataType::Map { .. } => unimplemented!(),
-            },
-        };
-        Ok(arr)
-    }
-}
-
-fn wrap_comparison_result(arr: BooleanArray) -> ArrayRef {
-    Arc::new(arr) as _
-}
-
-trait ProvidesColumnByName {
-    fn column_by_name(&self, name: &str) -> Option<&ArrayRef>;
-}
-
-impl ProvidesColumnByName for RecordBatch {
-    fn column_by_name(&self, name: &str) -> Option<&ArrayRef> {
-        self.column_by_name(name)
-    }
-}
-
-impl ProvidesColumnByName for StructArray {
-    fn column_by_name(&self, name: &str) -> Option<&ArrayRef> {
-        self.column_by_name(name)
-    }
-}
-
-// Given a RecordBatch or StructArray, recursively probe for a nested column path and return the
-// corresponding column, or Err if the path is invalid. For example, given the following schema:
-// ```text
-// root: {
-//   a: int32,
-//   b: struct {
-//     c: int32,
-//     d: struct {
-//       e: int32,
-//       f: int64,
-//     },
-//   },
-// }
-// ```
-// The path ["b", "d", "f"] would retrieve the int64 column while ["a", "b"] would produce an error.
-fn extract_column(mut parent: &dyn ProvidesColumnByName, col: &[String]) -> DeltaResult<ArrayRef> {
-    let mut field_names = col.iter();
-    let Some(mut field_name) = field_names.next() else {
-        return Err(ArrowError::SchemaError("Empty column path".to_string()))?;
-    };
-    loop {
-        let child = parent
-            .column_by_name(field_name)
-            .ok_or_else(|| ArrowError::SchemaError(format!("No such field: {field_name}")))?;
-        field_name = match field_names.next() {
-            Some(name) => name,
-            None => return Ok(child.clone()),
-        };
-        parent = child
-            .as_any()
-            .downcast_ref::<StructArray>()
-            .ok_or_else(|| ArrowError::SchemaError(format!("Not a struct: {field_name}")))?;
-    }
-}
-
-fn evaluate_expression(
-    expression: &Expression,
-    batch: &RecordBatch,
-    result_type: Option<&DataType>,
-) -> DeltaResult<ArrayRef> {
-    use BinaryOperator::*;
-    use Expression::*;
-    match (expression, result_type) {
-        (Literal(scalar), _) => Ok(scalar.to_array(batch.num_rows())?),
-        (Column(name), _) => extract_column(batch, name),
-        (Struct(fields), Some(DataType::Struct(output_schema))) => {
-            let columns = fields
-                .iter()
-                .zip(output_schema.fields())
-                .map(|(expr, field)| evaluate_expression(expr, batch, Some(field.data_type())));
-            let output_cols: Vec<ArrayRef> = columns.try_collect()?;
-            let output_fields: Vec<ArrowField> = output_cols
-                .iter()
-                .zip(output_schema.fields())
-                .map(|(output_col, output_field)| -> DeltaResult<_> {
-                    Ok(ArrowField::new(
-                        output_field.name(),
-                        output_col.data_type().clone(),
-                        output_col.is_nullable(),
-                    ))
-                })
-                .try_collect()?;
-            let result = StructArray::try_new(output_fields.into(), output_cols, None)?;
-            Ok(Arc::new(result))
-        }
-        (Struct(_), _) => Err(Error::generic(
-            "Data type is required to evaluate struct expressions",
-        )),
-        (Unary(UnaryExpression { op, expr }), _) => {
-            let arr = evaluate_expression(expr.as_ref(), batch, None)?;
-            Ok(match op {
-                UnaryOperator::Not => Arc::new(not(downcast_to_bool(&arr)?)?),
-                UnaryOperator::IsNull => Arc::new(is_null(&arr)?),
-            })
-        }
-        (
-            Binary(BinaryExpression {
-                op: In,
-                left,
-                right,
-            }),
-            _,
-        ) => match (left.as_ref(), right.as_ref()) {
-            (Literal(_), Column(_)) => {
-                let left_arr = evaluate_expression(left.as_ref(), batch, None)?;
-                let right_arr = evaluate_expression(right.as_ref(), batch, None)?;
-                if let Some(string_arr) = left_arr.as_string_opt::<i32>() {
-                    if let Some(right_arr) = right_arr.as_list_opt::<i32>() {
-                        return in_list_utf8(string_arr, right_arr)
-                            .map(wrap_comparison_result)
-                            .map_err(Error::generic_err);
-                    }
-                }
-                prim_array_cmp! {
-                    left_arr, right_arr,
-                    (ArrowDataType::Int8, Int8Type),
-                    (ArrowDataType::Int16, Int16Type),
-                    (ArrowDataType::Int32, Int32Type),
-                    (ArrowDataType::Int64, Int64Type),
-                    (ArrowDataType::UInt8, UInt8Type),
-                    (ArrowDataType::UInt16, UInt16Type),
-                    (ArrowDataType::UInt32, UInt32Type),
-                    (ArrowDataType::UInt64, UInt64Type),
-                    (ArrowDataType::Float16, Float16Type),
-                    (ArrowDataType::Float32, Float32Type),
-                    (ArrowDataType::Float64, Float64Type),
-                    (ArrowDataType::Timestamp(TimeUnit::Second, _), TimestampSecondType),
-                    (ArrowDataType::Timestamp(TimeUnit::Millisecond, _), TimestampMillisecondType),
-                    (ArrowDataType::Timestamp(TimeUnit::Microsecond, _), TimestampMicrosecondType),
-                    (ArrowDataType::Timestamp(TimeUnit::Nanosecond, _), TimestampNanosecondType),
-                    (ArrowDataType::Date32, Date32Type),
-                    (ArrowDataType::Date64, Date64Type),
-                    (ArrowDataType::Time32(TimeUnit::Second), Time32SecondType),
-                    (ArrowDataType::Time32(TimeUnit::Millisecond), Time32MillisecondType),
-                    (ArrowDataType::Time64(TimeUnit::Microsecond), Time64MicrosecondType),
-                    (ArrowDataType::Time64(TimeUnit::Nanosecond), Time64NanosecondType),
-                    (ArrowDataType::Duration(TimeUnit::Second), DurationSecondType),
-                    (ArrowDataType::Duration(TimeUnit::Millisecond), DurationMillisecondType),
-                    (ArrowDataType::Duration(TimeUnit::Microsecond), DurationMicrosecondType),
-                    (ArrowDataType::Duration(TimeUnit::Nanosecond), DurationNanosecondType),
-                    (ArrowDataType::Interval(IntervalUnit::DayTime), IntervalDayTimeType),
-                    (ArrowDataType::Interval(IntervalUnit::YearMonth), IntervalYearMonthType),
-                    (ArrowDataType::Interval(IntervalUnit::MonthDayNano), IntervalMonthDayNanoType),
-                    (ArrowDataType::Decimal128(_, _), Decimal128Type),
-                    (ArrowDataType::Decimal256(_, _), Decimal256Type)
-                }
-            }
-            (Literal(lit), Literal(Scalar::Array(ad))) => {
-                #[allow(deprecated)]
-                let exists = ad.array_elements().contains(lit);
-                Ok(Arc::new(BooleanArray::from(vec![exists])))
-            }
-            (l, r) => Err(Error::invalid_expression(format!(
-                "Invalid right value for (NOT) IN comparison, left is: {l} right is: {r}"
-            ))),
-        },
-        (
-            Binary(BinaryExpression {
-                op: NotIn,
-                left,
-                right,
-            }),
-            _,
-        ) => {
-            let reverse_op = Expression::binary(In, *left.clone(), *right.clone());
-            let reverse_expr = evaluate_expression(&reverse_op, batch, None)?;
-            not(reverse_expr.as_boolean())
-                .map(wrap_comparison_result)
-                .map_err(Error::generic_err)
-        }
-        (Binary(BinaryExpression { op, left, right }), _) => {
-            let left_arr = evaluate_expression(left.as_ref(), batch, None)?;
-            let right_arr = evaluate_expression(right.as_ref(), batch, None)?;
-
-            type Operation = fn(&dyn Datum, &dyn Datum) -> Result<ArrayRef, ArrowError>;
-            let eval: Operation = match op {
-                Plus => add,
-                Minus => sub,
-                Multiply => mul,
-                Divide => div,
-                LessThan => |l, r| lt(l, r).map(wrap_comparison_result),
-                LessThanOrEqual => |l, r| lt_eq(l, r).map(wrap_comparison_result),
-                GreaterThan => |l, r| gt(l, r).map(wrap_comparison_result),
-                GreaterThanOrEqual => |l, r| gt_eq(l, r).map(wrap_comparison_result),
-                Equal => |l, r| eq(l, r).map(wrap_comparison_result),
-                NotEqual => |l, r| neq(l, r).map(wrap_comparison_result),
-                Distinct => |l, r| distinct(l, r).map(wrap_comparison_result),
-                // NOTE: [Not]In was already covered above
-                In | NotIn => return Err(Error::generic("Invalid expression given")),
-            };
-
-            eval(&left_arr, &right_arr).map_err(Error::generic_err)
-        }
-        (Variadic(VariadicExpression { op, exprs }), None | Some(&DataType::BOOLEAN)) => {
-            type Operation = fn(&BooleanArray, &BooleanArray) -> Result<BooleanArray, ArrowError>;
-            let (reducer, default): (Operation, _) = match op {
-                VariadicOperator::And => (and_kleene, true),
-                VariadicOperator::Or => (or_kleene, false),
-            };
-            exprs
-                .iter()
-                .map(|expr| evaluate_expression(expr, batch, result_type))
-                .reduce(|l, r| {
-                    Ok(reducer(downcast_to_bool(&l?)?, downcast_to_bool(&r?)?)
-                        .map(wrap_comparison_result)?)
-                })
-                .unwrap_or_else(|| {
-                    evaluate_expression(&Expression::literal(default), batch, result_type)
-                })
-        }
-        (Variadic(_), _) => {
-            // NOTE: Update this error message if we add support for variadic operations on other types
-            Err(Error::Generic(format!(
-                "Variadic {expression:?} is expected to return boolean results, got {result_type:?}"
-            )))
-        }
-    }
-}
-
-// Apply a schema to an array. The array _must_ be a `StructArray`. Returns a `RecordBatch where the
-// names of fields, nullable, and metadata in the struct have been transformed to match those in
-// schema specified by `schema`
-fn apply_schema(array: &dyn Array, schema: &DataType) -> DeltaResult<RecordBatch> {
-    let DataType::Struct(struct_schema) = schema else {
-        return Err(Error::generic(
-            "apply_schema at top-level must be passed a struct schema",
-        ));
-    };
-    let applied = apply_schema_to_struct(array, struct_schema)?;
-    Ok(applied.into())
-}
-
-// helper to transform an arrow field+col into the specified target type. If `rename` is specified
-// the field will be renamed to the contained `str`.
-fn new_field_with_metadata(
-    field_name: &str,
-    data_type: &ArrowDataType,
-    nullable: bool,
-    metadata: Option<HashMap<String, String>>,
-) -> ArrowField {
-    let mut field = ArrowField::new(field_name, data_type.clone(), nullable);
-    if let Some(metadata) = metadata {
-        field.set_metadata(metadata);
-    };
-    field
-}
-
-// A helper that is a wrapper over `transform_field_and_col`. This will take apart the passed struct
-// and use that method to transform each column and then put the struct back together. Target types
-// and names for each column should be passed in `target_types_and_names`. The number of elements in
-// the `target_types_and_names` iterator _must_ be the same as the number of columns in
-// `struct_array`. The transformation is ordinal. That is, the order of fields in `target_fields`
-// _must_ match the order of the columns in `struct_array`.
-fn transform_struct(
-    struct_array: &StructArray,
-    target_fields: impl Iterator<Item = impl Borrow<StructField>>,
-) -> DeltaResult<StructArray> {
-    let (_, arrow_cols, nulls) = struct_array.clone().into_parts();
-    let input_col_count = arrow_cols.len();
-    let result_iter =
-        arrow_cols
-            .into_iter()
-            .zip(target_fields)
-            .map(|(sa_col, target_field)| -> DeltaResult<_> {
-                let target_field = target_field.borrow();
-                let transformed_col = apply_schema_to(&sa_col, target_field.data_type())?;
-                let transformed_field = new_field_with_metadata(
-                    &target_field.name,
-                    transformed_col.data_type(),
-                    target_field.nullable,
-                    Some(target_field.metadata_with_string_values()),
-                );
-                Ok((transformed_field, transformed_col))
-            });
-    let (transformed_fields, transformed_cols): (Vec<ArrowField>, Vec<ArrayRef>) =
-        result_iter.process_results(|iter| iter.unzip())?;
-    if transformed_cols.len() != input_col_count {
-        return Err(Error::InternalError(format!(
-            "Passed struct had {input_col_count} columns, but transformed column has {}",
-            transformed_cols.len()
-        )));
-    }
-    Ok(StructArray::try_new(
-        transformed_fields.into(),
-        transformed_cols,
-        nulls,
-    )?)
-}
-
-// Transform a struct array. The data is in `array`, and the target fields are in `kernel_fields`.
-fn apply_schema_to_struct(array: &dyn Array, kernel_fields: &Schema) -> DeltaResult<StructArray> {
-    let Some(sa) = array.as_struct_opt() else {
-        return Err(make_arrow_error(
-            "Arrow claimed to be a struct but isn't a StructArray",
-        ));
-    };
-    transform_struct(sa, kernel_fields.fields())
-}
-
-// deconstruct the array, then rebuild the mapped version
-fn apply_schema_to_list(
-    array: &dyn Array,
-    target_inner_type: &ArrayType,
-) -> DeltaResult<ListArray> {
-    let Some(la) = array.as_list_opt() else {
-        return Err(make_arrow_error(
-            "Arrow claimed to be a list but isn't a ListArray",
-        ));
-    };
-    let (field, offset_buffer, values, nulls) = la.clone().into_parts();
-
-    let transformed_values = apply_schema_to(&values, &target_inner_type.element_type)?;
-    let transformed_field = ArrowField::new(
-        field.name(),
-        transformed_values.data_type().clone(),
-        target_inner_type.contains_null,
-    );
-    Ok(ListArray::try_new(
-        Arc::new(transformed_field),
-        offset_buffer,
-        transformed_values,
-        nulls,
-    )?)
-}
-
-// deconstruct a map, and rebuild it with the specified target kernel type
-fn apply_schema_to_map(array: &dyn Array, kernel_map_type: &MapType) -> DeltaResult<MapArray> {
-    let Some(ma) = array.as_map_opt() else {
-        return Err(make_arrow_error(
-            "Arrow claimed to be a map but isn't a MapArray",
-        ));
-    };
-    let (map_field, offset_buffer, map_struct_array, nulls, ordered) = ma.clone().into_parts();
-    let target_fields = map_struct_array
-        .fields()
-        .iter()
-        .zip([&kernel_map_type.key_type, &kernel_map_type.value_type])
-        .zip([false, kernel_map_type.value_contains_null])
-        .map(|((arrow_field, target_type), nullable)| {
-            StructField::new(arrow_field.name(), target_type.clone(), nullable)
-        });
-
-    // Arrow puts the key type/val as the first field/col and the value type/val as the second. So
-    // we just transform like a 'normal' struct, but we know there are two fields/cols and we
-    // specify the key/value types as the target type iterator.
-    let transformed_map_struct_array = transform_struct(&map_struct_array, target_fields)?;
-
-    let transformed_map_field = ArrowField::new(
-        map_field.name().clone(),
-        transformed_map_struct_array.data_type().clone(),
-        map_field.is_nullable(),
-    );
-    Ok(MapArray::try_new(
-        Arc::new(transformed_map_field),
-        offset_buffer,
-        transformed_map_struct_array,
-        nulls,
-        ordered,
-    )?)
-}
-
-// apply `schema` to `array`. This handles renaming, and adjusting nullability and metadata. if the
-// actual data types don't match, this will return an error
-fn apply_schema_to(array: &ArrayRef, schema: &DataType) -> DeltaResult<ArrayRef> {
-    use DataType::*;
-    let array: ArrayRef = match schema {
-        Struct(stype) => Arc::new(apply_schema_to_struct(array, stype)?),
-        Array(atype) => Arc::new(apply_schema_to_list(array, atype)?),
-        Map(mtype) => Arc::new(apply_schema_to_map(array, mtype)?),
-        _ => {
-            ensure_data_types(schema, array.data_type(), true)?;
-            array.clone()
-        }
-    };
-    Ok(array)
-}
-
-#[derive(Debug)]
-pub struct ArrowExpressionHandler;
-
-impl ExpressionHandler for ArrowExpressionHandler {
-    fn get_evaluator(
-        &self,
-        schema: SchemaRef,
-        expression: Expression,
-        output_type: DataType,
-    ) -> Arc<dyn ExpressionEvaluator> {
-        Arc::new(DefaultExpressionEvaluator {
-            input_schema: schema,
-            expression: Box::new(expression),
-            output_type,
-        })
-    }
-}
-
-#[derive(Debug)]
-pub struct DefaultExpressionEvaluator {
-    input_schema: SchemaRef,
-    expression: Box<Expression>,
-    output_type: DataType,
-}
-
-impl ExpressionEvaluator for DefaultExpressionEvaluator {
-    fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult<Box<dyn EngineData>> {
-        let batch = batch
-            .any_ref()
-            .downcast_ref::<ArrowEngineData>()
-            .ok_or_else(|| Error::engine_data_type("ArrowEngineData"))?
-            .record_batch();
-        let _input_schema: ArrowSchema = self.input_schema.as_ref().try_into()?;
-        // TODO: make sure we have matching schemas for validation
-        // if batch.schema().as_ref() != &input_schema {
-        //     return Err(Error::Generic(format!(
-        //         "input schema does not match batch schema: {:?} != {:?}",
-        //         input_schema,
-        //         batch.schema()
-        //     )));
-        // };
-        let array_ref = evaluate_expression(&self.expression, batch, Some(&self.output_type))?;
-        let batch: RecordBatch = if let DataType::Struct(_) = self.output_type {
-            apply_schema(&array_ref, &self.output_type)?
-        } else {
-            let array_ref = apply_schema_to(&array_ref, &self.output_type)?;
-            let arrow_type: ArrowDataType = ArrowDataType::try_from(&self.output_type)?;
-            let schema = ArrowSchema::new(vec![ArrowField::new("output", arrow_type, true)]);
-            RecordBatch::try_new(Arc::new(schema), vec![array_ref])?
-        };
-        Ok(Box::new(ArrowEngineData::new(batch)))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::ops::{Add, Div, Mul, Sub};
-
-    use arrow_array::{GenericStringArray, Int32Array};
-    use arrow_buffer::ScalarBuffer;
-    use arrow_schema::{DataType, Field, Fields, Schema};
-
-    use super::*;
-    use crate::expressions::*;
-    use crate::schema::ArrayType;
-    use crate::DataType as DeltaDataTypes;
-
-    #[test]
-    fn test_array_column() {
-        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]);
-        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9]));
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
-        let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true));
-
-        let schema = Schema::new([arr_field.clone()]);
-
-        let array = ListArray::new(field.clone(), offsets, Arc::new(values), None);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap();
-
-        let not_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item"));
-
-        let in_op = Expression::binary(BinaryOperator::In, 5, column_expr!("item"));
-
-        let result = evaluate_expression(&not_op, &batch, None).unwrap();
-        let expected = BooleanArray::from(vec![true, false, true]);
-        assert_eq!(result.as_ref(), &expected);
-
-        let in_result = evaluate_expression(&in_op, &batch, None).unwrap();
-        let in_expected = BooleanArray::from(vec![false, true, false]);
-        assert_eq!(in_result.as_ref(), &in_expected);
-    }
-
-    #[test]
-    fn test_bad_right_type_array() {
-        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]);
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
-        let schema = Schema::new([field.clone()]);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values.clone())]).unwrap();
-
-        let in_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item"));
-
-        let in_result = evaluate_expression(&in_op, &batch, None);
-
-        assert!(in_result.is_err());
-        assert_eq!(
-            in_result.unwrap_err().to_string(),
-            "Invalid expression evaluation: Cannot cast to list array: Int32"
-        );
-    }
-
-    #[test]
-    fn test_literal_type_array() {
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
-        let schema = Schema::new([field.clone()]);
-        let batch = RecordBatch::new_empty(Arc::new(schema));
-
-        let in_op = Expression::binary(
-            BinaryOperator::NotIn,
-            5,
-            Scalar::Array(ArrayData::new(
-                ArrayType::new(DeltaDataTypes::INTEGER, false),
-                vec![Scalar::Integer(1), Scalar::Integer(2)],
-            )),
-        );
-
-        let in_result = evaluate_expression(&in_op, &batch, None).unwrap();
-        let in_expected = BooleanArray::from(vec![true]);
-        assert_eq!(in_result.as_ref(), &in_expected);
-    }
-
-    #[test]
-    fn test_invalid_array_sides() {
-        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]);
-        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9]));
-        let field = Arc::new(Field::new("item", DataType::Int32, true));
-        let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true));
-
-        let schema = Schema::new([arr_field.clone()]);
-
-        let array = ListArray::new(field.clone(), offsets, Arc::new(values), None);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap();
-
-        let in_op = Expression::binary(
-            BinaryOperator::NotIn,
-            column_expr!("item"),
-            column_expr!("item"),
-        );
-
-        let in_result = evaluate_expression(&in_op, &batch, None);
-
-        assert!(in_result.is_err());
-        assert_eq!(
-            in_result.unwrap_err().to_string(),
-            "Invalid expression evaluation: Invalid right value for (NOT) IN comparison, left is: Column(item) right is: Column(item)".to_string()
-        )
-    }
-
-    #[test]
-    fn test_str_arrays() {
-        let values = GenericStringArray::<i32>::from(vec![
-            "hi", "bye", "hi", "hi", "bye", "bye", "hi", "bye", "hi",
-        ]);
-        let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9]));
-        let field = Arc::new(Field::new("item", DataType::Utf8, true));
-        let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true));
-        let schema = Schema::new([arr_field.clone()]);
-        let array = ListArray::new(field.clone(), offsets, Arc::new(values), None);
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap();
-
-        let str_not_op = Expression::binary(BinaryOperator::NotIn, "bye", column_expr!("item"));
-
-        let str_in_op = Expression::binary(BinaryOperator::In, "hi", column_expr!("item"));
-
-        let result = evaluate_expression(&str_in_op, &batch, None).unwrap();
-        let expected = BooleanArray::from(vec![true, true, true]);
-        assert_eq!(result.as_ref(), &expected);
-
-        let in_result = evaluate_expression(&str_not_op, &batch, None).unwrap();
-        let in_expected = BooleanArray::from(vec![false, false, false]);
-        assert_eq!(in_result.as_ref(), &in_expected);
-    }
-
-    #[test]
-    fn test_extract_column() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values.clone())]).unwrap();
-        let column = column_expr!("a");
-
-        let results = evaluate_expression(&column, &batch, None).unwrap();
-        assert_eq!(results.as_ref(), &values);
-
-        let schema = Schema::new(vec![Field::new(
-            "b",
-            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
-            false,
-        )]);
-
-        let struct_values: ArrayRef = Arc::new(values.clone());
-        let struct_array = StructArray::from(vec![(
-            Arc::new(Field::new("a", DataType::Int32, false)),
-            struct_values,
-        )]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(struct_array.clone())],
-        )
-        .unwrap();
-        let column = column_expr!("b.a");
-        let results = evaluate_expression(&column, &batch, None).unwrap();
-        assert_eq!(results.as_ref(), &values);
-    }
-
-    #[test]
-    fn test_binary_op_scalar() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
-        let column = column_expr!("a");
-
-        let expression = column.clone().add(1);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 3, 4]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().sub(1);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![0, 1, 2]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().mul(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        // TODO handle type casting
-        let expression = column.div(1);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![1, 2, 3]));
-        assert_eq!(results.as_ref(), expected.as_ref())
-    }
-
-    #[test]
-    fn test_binary_op() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Int32, false),
-        ]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(values.clone()), Arc::new(values)],
-        )
-        .unwrap();
-        let column_a = column_expr!("a");
-        let column_b = column_expr!("b");
-
-        let expression = column_a.clone().add(column_b.clone());
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column_a.clone().sub(column_b.clone());
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![0, 0, 0]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column_a.clone().mul(column_b);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(Int32Array::from(vec![1, 4, 9]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-
-    #[test]
-    fn test_binary_cmp() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let values = Int32Array::from(vec![1, 2, 3]);
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
-        let column = column_expr!("a");
-
-        let expression = column.clone().lt(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().lt_eq(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().gt(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, false, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().gt_eq(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, true, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().eq(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column.clone().ne(2);
-        let results = evaluate_expression(&expression, &batch, None).unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-
-    #[test]
-    fn test_logical() {
-        let schema = Schema::new(vec![
-            Field::new("a", DataType::Boolean, false),
-            Field::new("b", DataType::Boolean, false),
-        ]);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![
-                Arc::new(BooleanArray::from(vec![true, false])),
-                Arc::new(BooleanArray::from(vec![false, true])),
-            ],
-        )
-        .unwrap();
-        let column_a = column_expr!("a");
-        let column_b = column_expr!("b");
-
-        let expression = column_a.clone().and(column_b.clone());
-        let results =
-            evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN))
-                .unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![false, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column_a.clone().and(true);
-        let results =
-            evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN))
-                .unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column_a.clone().or(column_b);
-        let results =
-            evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN))
-                .unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, true]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-
-        let expression = column_a.clone().or(false);
-        let results =
-            evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN))
-                .unwrap();
-        let expected = Arc::new(BooleanArray::from(vec![true, false]));
-        assert_eq!(results.as_ref(), expected.as_ref());
-    }
-}
diff --git a/kernel/src/engine/arrow_expression/apply_schema.rs b/kernel/src/engine/arrow_expression/apply_schema.rs
new file mode 100644
index 000000000..68fbb1438
--- /dev/null
+++ b/kernel/src/engine/arrow_expression/apply_schema.rs
@@ -0,0 +1,185 @@
+use std::borrow::Borrow;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use itertools::Itertools;
+
+use crate::arrow::array::{
+    Array, ArrayRef, AsArray, ListArray, MapArray, RecordBatch, StructArray,
+};
+use crate::arrow::datatypes::Schema as ArrowSchema;
+use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField};
+
+use super::super::arrow_utils::make_arrow_error;
+use crate::engine::ensure_data_types::ensure_data_types;
+use crate::error::{DeltaResult, Error};
+use crate::schema::{ArrayType, DataType, MapType, Schema, StructField};
+
+// Apply a schema to an array. The array _must_ be a `StructArray`. Returns a `RecordBatch where the
+// names of fields, nullable, and metadata in the struct have been transformed to match those in
+// schema specified by `schema`
+pub(crate) fn apply_schema(array: &dyn Array, schema: &DataType) -> DeltaResult<RecordBatch> {
+    let DataType::Struct(struct_schema) = schema else {
+        return Err(Error::generic(
+            "apply_schema at top-level must be passed a struct schema",
+        ));
+    };
+    let applied = apply_schema_to_struct(array, struct_schema)?;
+    let (fields, columns, nulls) = applied.into_parts();
+    if let Some(nulls) = nulls {
+        if nulls.null_count() != 0 {
+            return Err(Error::invalid_struct_data(
+                "Top-level nulls in struct are not supported",
+            ));
+        }
+    }
+    Ok(RecordBatch::try_new(
+        Arc::new(ArrowSchema::new(fields)),
+        columns,
+    )?)
+}
+
+// helper to transform an arrow field+col into the specified target type. If `rename` is specified
+// the field will be renamed to the contained `str`.
+fn new_field_with_metadata(
+    field_name: &str,
+    data_type: &ArrowDataType,
+    nullable: bool,
+    metadata: Option<HashMap<String, String>>,
+) -> ArrowField {
+    let mut field = ArrowField::new(field_name, data_type.clone(), nullable);
+    if let Some(metadata) = metadata {
+        field.set_metadata(metadata);
+    };
+    field
+}
+
+// A helper that is a wrapper over `transform_field_and_col`. This will take apart the passed struct
+// and use that method to transform each column and then put the struct back together. Target types
+// and names for each column should be passed in `target_types_and_names`. The number of elements in
+// the `target_types_and_names` iterator _must_ be the same as the number of columns in
+// `struct_array`. The transformation is ordinal. That is, the order of fields in `target_fields`
+// _must_ match the order of the columns in `struct_array`.
+fn transform_struct(
+    struct_array: &StructArray,
+    target_fields: impl Iterator<Item = impl Borrow<StructField>>,
+) -> DeltaResult<StructArray> {
+    let (_, arrow_cols, nulls) = struct_array.clone().into_parts();
+    let input_col_count = arrow_cols.len();
+    let result_iter =
+        arrow_cols
+            .into_iter()
+            .zip(target_fields)
+            .map(|(sa_col, target_field)| -> DeltaResult<_> {
+                let target_field = target_field.borrow();
+                let transformed_col = apply_schema_to(&sa_col, target_field.data_type())?;
+                let transformed_field = new_field_with_metadata(
+                    &target_field.name,
+                    transformed_col.data_type(),
+                    target_field.nullable,
+                    Some(target_field.metadata_with_string_values()),
+                );
+                Ok((transformed_field, transformed_col))
+            });
+    let (transformed_fields, transformed_cols): (Vec<ArrowField>, Vec<ArrayRef>) =
+        result_iter.process_results(|iter| iter.unzip())?;
+    if transformed_cols.len() != input_col_count {
+        return Err(Error::InternalError(format!(
+            "Passed struct had {input_col_count} columns, but transformed column has {}",
+            transformed_cols.len()
+        )));
+    }
+    Ok(StructArray::try_new(
+        transformed_fields.into(),
+        transformed_cols,
+        nulls,
+    )?)
+}
+
+// Transform a struct array. The data is in `array`, and the target fields are in `kernel_fields`.
+fn apply_schema_to_struct(array: &dyn Array, kernel_fields: &Schema) -> DeltaResult<StructArray> {
+    let Some(sa) = array.as_struct_opt() else {
+        return Err(make_arrow_error(
+            "Arrow claimed to be a struct but isn't a StructArray",
+        ));
+    };
+    transform_struct(sa, kernel_fields.fields())
+}
+
+// deconstruct the array, then rebuild the mapped version
+fn apply_schema_to_list(
+    array: &dyn Array,
+    target_inner_type: &ArrayType,
+) -> DeltaResult<ListArray> {
+    let Some(la) = array.as_list_opt() else {
+        return Err(make_arrow_error(
+            "Arrow claimed to be a list but isn't a ListArray",
+        ));
+    };
+    let (field, offset_buffer, values, nulls) = la.clone().into_parts();
+
+    let transformed_values = apply_schema_to(&values, &target_inner_type.element_type)?;
+    let transformed_field = ArrowField::new(
+        field.name(),
+        transformed_values.data_type().clone(),
+        target_inner_type.contains_null,
+    );
+    Ok(ListArray::try_new(
+        Arc::new(transformed_field),
+        offset_buffer,
+        transformed_values,
+        nulls,
+    )?)
+}
+
+// deconstruct a map, and rebuild it with the specified target kernel type
+fn apply_schema_to_map(array: &dyn Array, kernel_map_type: &MapType) -> DeltaResult<MapArray> {
+    let Some(ma) = array.as_map_opt() else {
+        return Err(make_arrow_error(
+            "Arrow claimed to be a map but isn't a MapArray",
+        ));
+    };
+    let (map_field, offset_buffer, map_struct_array, nulls, ordered) = ma.clone().into_parts();
+    let target_fields = map_struct_array
+        .fields()
+        .iter()
+        .zip([&kernel_map_type.key_type, &kernel_map_type.value_type])
+        .zip([false, kernel_map_type.value_contains_null])
+        .map(|((arrow_field, target_type), nullable)| {
+            StructField::new(arrow_field.name(), target_type.clone(), nullable)
+        });
+
+    // Arrow puts the key type/val as the first field/col and the value type/val as the second. So
+    // we just transform like a 'normal' struct, but we know there are two fields/cols and we
+    // specify the key/value types as the target type iterator.
+    let transformed_map_struct_array = transform_struct(&map_struct_array, target_fields)?;
+
+    let transformed_map_field = ArrowField::new(
+        map_field.name().clone(),
+        transformed_map_struct_array.data_type().clone(),
+        map_field.is_nullable(),
+    );
+    Ok(MapArray::try_new(
+        Arc::new(transformed_map_field),
+        offset_buffer,
+        transformed_map_struct_array,
+        nulls,
+        ordered,
+    )?)
+}
+
+// apply `schema` to `array`. This handles renaming, and adjusting nullability and metadata. if the
+// actual data types don't match, this will return an error
+pub(crate) fn apply_schema_to(array: &ArrayRef, schema: &DataType) -> DeltaResult<ArrayRef> {
+    use DataType::*;
+    let array: ArrayRef = match schema {
+        Struct(stype) => Arc::new(apply_schema_to_struct(array, stype)?),
+        Array(atype) => Arc::new(apply_schema_to_list(array, atype)?),
+        Map(mtype) => Arc::new(apply_schema_to_map(array, mtype)?),
+        _ => {
+            ensure_data_types(schema, array.data_type(), true)?;
+            array.clone()
+        }
+    };
+    Ok(array)
+}
diff --git a/kernel/src/engine/arrow_expression/evaluate_expression.rs b/kernel/src/engine/arrow_expression/evaluate_expression.rs
new file mode 100644
index 000000000..3a2876f78
--- /dev/null
+++ b/kernel/src/engine/arrow_expression/evaluate_expression.rs
@@ -0,0 +1,247 @@
+//! Expression handling based on arrow-rs compute kernels.
+use crate::arrow::array::types::*;
+use crate::arrow::array::{
+    Array, ArrayRef, AsArray, BooleanArray, Datum, RecordBatch, StructArray,
+};
+use crate::arrow::compute::kernels::cmp::{distinct, eq, gt, gt_eq, lt, lt_eq, neq};
+use crate::arrow::compute::kernels::comparison::in_list_utf8;
+use crate::arrow::compute::kernels::numeric::{add, div, mul, sub};
+use crate::arrow::compute::{and_kleene, is_null, not, or_kleene};
+use crate::arrow::datatypes::{
+    DataType as ArrowDataType, Field as ArrowField, IntervalUnit, TimeUnit,
+};
+use crate::arrow::error::ArrowError;
+use crate::engine::arrow_utils::prim_array_cmp;
+use crate::error::{DeltaResult, Error};
+use crate::expressions::{
+    BinaryExpression, BinaryOperator, Expression, Scalar, UnaryExpression, UnaryOperator,
+    VariadicExpression, VariadicOperator,
+};
+use crate::schema::DataType;
+use itertools::Itertools;
+use std::sync::Arc;
+
+fn downcast_to_bool(arr: &dyn Array) -> DeltaResult<&BooleanArray> {
+    arr.as_any()
+        .downcast_ref::<BooleanArray>()
+        .ok_or_else(|| Error::generic("expected boolean array"))
+}
+
+fn wrap_comparison_result(arr: BooleanArray) -> ArrayRef {
+    Arc::new(arr) as _
+}
+
+trait ProvidesColumnByName {
+    fn column_by_name(&self, name: &str) -> Option<&ArrayRef>;
+}
+
+impl ProvidesColumnByName for RecordBatch {
+    fn column_by_name(&self, name: &str) -> Option<&ArrayRef> {
+        self.column_by_name(name)
+    }
+}
+
+impl ProvidesColumnByName for StructArray {
+    fn column_by_name(&self, name: &str) -> Option<&ArrayRef> {
+        self.column_by_name(name)
+    }
+}
+
+// Given a RecordBatch or StructArray, recursively probe for a nested column path and return the
+// corresponding column, or Err if the path is invalid. For example, given the following schema:
+// ```text
+// root: {
+//   a: int32,
+//   b: struct {
+//     c: int32,
+//     d: struct {
+//       e: int32,
+//       f: int64,
+//     },
+//   },
+// }
+// ```
+// The path ["b", "d", "f"] would retrieve the int64 column while ["a", "b"] would produce an error.
+fn extract_column(mut parent: &dyn ProvidesColumnByName, col: &[String]) -> DeltaResult<ArrayRef> {
+    let mut field_names = col.iter();
+    let Some(mut field_name) = field_names.next() else {
+        return Err(ArrowError::SchemaError("Empty column path".to_string()))?;
+    };
+    loop {
+        let child = parent
+            .column_by_name(field_name)
+            .ok_or_else(|| ArrowError::SchemaError(format!("No such field: {field_name}")))?;
+        field_name = match field_names.next() {
+            Some(name) => name,
+            None => return Ok(child.clone()),
+        };
+        parent = child
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .ok_or_else(|| ArrowError::SchemaError(format!("Not a struct: {field_name}")))?;
+    }
+}
+
+pub(crate) fn evaluate_expression(
+    expression: &Expression,
+    batch: &RecordBatch,
+    result_type: Option<&DataType>,
+) -> DeltaResult<ArrayRef> {
+    use BinaryOperator::*;
+    use Expression::*;
+    match (expression, result_type) {
+        (Literal(scalar), _) => Ok(scalar.to_array(batch.num_rows())?),
+        (Column(name), _) => extract_column(batch, name),
+        (Struct(fields), Some(DataType::Struct(output_schema))) => {
+            let columns = fields
+                .iter()
+                .zip(output_schema.fields())
+                .map(|(expr, field)| evaluate_expression(expr, batch, Some(field.data_type())));
+            let output_cols: Vec<ArrayRef> = columns.try_collect()?;
+            let output_fields: Vec<ArrowField> = output_cols
+                .iter()
+                .zip(output_schema.fields())
+                .map(|(output_col, output_field)| -> DeltaResult<_> {
+                    Ok(ArrowField::new(
+                        output_field.name(),
+                        output_col.data_type().clone(),
+                        output_col.is_nullable(),
+                    ))
+                })
+                .try_collect()?;
+            let result = StructArray::try_new(output_fields.into(), output_cols, None)?;
+            Ok(Arc::new(result))
+        }
+        (Struct(_), _) => Err(Error::generic(
+            "Data type is required to evaluate struct expressions",
+        )),
+        (Unary(UnaryExpression { op, expr }), _) => {
+            let arr = evaluate_expression(expr.as_ref(), batch, None)?;
+            Ok(match op {
+                UnaryOperator::Not => Arc::new(not(downcast_to_bool(&arr)?)?),
+                UnaryOperator::IsNull => Arc::new(is_null(&arr)?),
+            })
+        }
+        (
+            Binary(BinaryExpression {
+                op: In,
+                left,
+                right,
+            }),
+            _,
+        ) => match (left.as_ref(), right.as_ref()) {
+            (Literal(_), Column(_)) => {
+                let left_arr = evaluate_expression(left.as_ref(), batch, None)?;
+                let right_arr = evaluate_expression(right.as_ref(), batch, None)?;
+                if let Some(string_arr) = left_arr.as_string_opt::<i32>() {
+                    if let Some(right_arr) = right_arr.as_list_opt::<i32>() {
+                        return in_list_utf8(string_arr, right_arr)
+                            .map(wrap_comparison_result)
+                            .map_err(Error::generic_err);
+                    }
+                }
+                prim_array_cmp! {
+                    left_arr, right_arr,
+                    (ArrowDataType::Int8, Int8Type),
+                    (ArrowDataType::Int16, Int16Type),
+                    (ArrowDataType::Int32, Int32Type),
+                    (ArrowDataType::Int64, Int64Type),
+                    (ArrowDataType::UInt8, UInt8Type),
+                    (ArrowDataType::UInt16, UInt16Type),
+                    (ArrowDataType::UInt32, UInt32Type),
+                    (ArrowDataType::UInt64, UInt64Type),
+                    (ArrowDataType::Float16, Float16Type),
+                    (ArrowDataType::Float32, Float32Type),
+                    (ArrowDataType::Float64, Float64Type),
+                    (ArrowDataType::Timestamp(TimeUnit::Second, _), TimestampSecondType),
+                    (ArrowDataType::Timestamp(TimeUnit::Millisecond, _), TimestampMillisecondType),
+                    (ArrowDataType::Timestamp(TimeUnit::Microsecond, _), TimestampMicrosecondType),
+                    (ArrowDataType::Timestamp(TimeUnit::Nanosecond, _), TimestampNanosecondType),
+                    (ArrowDataType::Date32, Date32Type),
+                    (ArrowDataType::Date64, Date64Type),
+                    (ArrowDataType::Time32(TimeUnit::Second), Time32SecondType),
+                    (ArrowDataType::Time32(TimeUnit::Millisecond), Time32MillisecondType),
+                    (ArrowDataType::Time64(TimeUnit::Microsecond), Time64MicrosecondType),
+                    (ArrowDataType::Time64(TimeUnit::Nanosecond), Time64NanosecondType),
+                    (ArrowDataType::Duration(TimeUnit::Second), DurationSecondType),
+                    (ArrowDataType::Duration(TimeUnit::Millisecond), DurationMillisecondType),
+                    (ArrowDataType::Duration(TimeUnit::Microsecond), DurationMicrosecondType),
+                    (ArrowDataType::Duration(TimeUnit::Nanosecond), DurationNanosecondType),
+                    (ArrowDataType::Interval(IntervalUnit::DayTime), IntervalDayTimeType),
+                    (ArrowDataType::Interval(IntervalUnit::YearMonth), IntervalYearMonthType),
+                    (ArrowDataType::Interval(IntervalUnit::MonthDayNano), IntervalMonthDayNanoType),
+                    (ArrowDataType::Decimal128(_, _), Decimal128Type),
+                    (ArrowDataType::Decimal256(_, _), Decimal256Type)
+                }
+            }
+            (Literal(lit), Literal(Scalar::Array(ad))) => {
+                #[allow(deprecated)]
+                let exists = ad.array_elements().contains(lit);
+                Ok(Arc::new(BooleanArray::from(vec![exists])))
+            }
+            (l, r) => Err(Error::invalid_expression(format!(
+                "Invalid right value for (NOT) IN comparison, left is: {l} right is: {r}"
+            ))),
+        },
+        (
+            Binary(BinaryExpression {
+                op: NotIn,
+                left,
+                right,
+            }),
+            _,
+        ) => {
+            let reverse_op = Expression::binary(In, *left.clone(), *right.clone());
+            let reverse_expr = evaluate_expression(&reverse_op, batch, None)?;
+            not(reverse_expr.as_boolean())
+                .map(wrap_comparison_result)
+                .map_err(Error::generic_err)
+        }
+        (Binary(BinaryExpression { op, left, right }), _) => {
+            let left_arr = evaluate_expression(left.as_ref(), batch, None)?;
+            let right_arr = evaluate_expression(right.as_ref(), batch, None)?;
+
+            type Operation = fn(&dyn Datum, &dyn Datum) -> Result<ArrayRef, ArrowError>;
+            let eval: Operation = match op {
+                Plus => add,
+                Minus => sub,
+                Multiply => mul,
+                Divide => div,
+                LessThan => |l, r| lt(l, r).map(wrap_comparison_result),
+                LessThanOrEqual => |l, r| lt_eq(l, r).map(wrap_comparison_result),
+                GreaterThan => |l, r| gt(l, r).map(wrap_comparison_result),
+                GreaterThanOrEqual => |l, r| gt_eq(l, r).map(wrap_comparison_result),
+                Equal => |l, r| eq(l, r).map(wrap_comparison_result),
+                NotEqual => |l, r| neq(l, r).map(wrap_comparison_result),
+                Distinct => |l, r| distinct(l, r).map(wrap_comparison_result),
+                // NOTE: [Not]In was already covered above
+                In | NotIn => return Err(Error::generic("Invalid expression given")),
+            };
+
+            eval(&left_arr, &right_arr).map_err(Error::generic_err)
+        }
+        (Variadic(VariadicExpression { op, exprs }), None | Some(&DataType::BOOLEAN)) => {
+            type Operation = fn(&BooleanArray, &BooleanArray) -> Result<BooleanArray, ArrowError>;
+            let (reducer, default): (Operation, _) = match op {
+                VariadicOperator::And => (and_kleene, true),
+                VariadicOperator::Or => (or_kleene, false),
+            };
+            exprs
+                .iter()
+                .map(|expr| evaluate_expression(expr, batch, result_type))
+                .reduce(|l, r| {
+                    Ok(reducer(downcast_to_bool(&l?)?, downcast_to_bool(&r?)?)
+                        .map(wrap_comparison_result)?)
+                })
+                .unwrap_or_else(|| {
+                    evaluate_expression(&Expression::literal(default), batch, result_type)
+                })
+        }
+        (Variadic(_), _) => {
+            // NOTE: Update this error message if we add support for variadic operations on other types
+            Err(Error::Generic(format!(
+                "Variadic {expression:?} is expected to return boolean results, got {result_type:?}"
+            )))
+        }
+    }
+}
diff --git a/kernel/src/engine/arrow_expression/mod.rs b/kernel/src/engine/arrow_expression/mod.rs
new file mode 100644
index 000000000..019531931
--- /dev/null
+++ b/kernel/src/engine/arrow_expression/mod.rs
@@ -0,0 +1,194 @@
+//! Expression handling based on arrow-rs compute kernels.
+use std::sync::Arc;
+
+use crate::arrow::array::{
+    Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Decimal128Array, Float32Array,
+    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, RecordBatch,
+    StringArray, StructArray, TimestampMicrosecondArray,
+};
+use crate::arrow::buffer::OffsetBuffer;
+use crate::arrow::compute::concat;
+use crate::arrow::datatypes::{
+    DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema,
+};
+
+use super::arrow_conversion::LIST_ARRAY_ROOT;
+use crate::engine::arrow_data::ArrowEngineData;
+use crate::error::{DeltaResult, Error};
+use crate::expressions::{Expression, Scalar};
+use crate::schema::{DataType, PrimitiveType, SchemaRef};
+use crate::{EngineData, EvaluationHandler, ExpressionEvaluator};
+
+use itertools::Itertools;
+use tracing::debug;
+
+use apply_schema::{apply_schema, apply_schema_to};
+use evaluate_expression::evaluate_expression;
+
+mod apply_schema;
+mod evaluate_expression;
+
+#[cfg(test)]
+mod tests;
+
+// TODO leverage scalars / Datum
+
+impl Scalar {
+    /// Convert scalar to arrow array.
+    pub fn to_array(&self, num_rows: usize) -> DeltaResult<ArrayRef> {
+        use Scalar::*;
+        let arr: ArrayRef = match self {
+            Integer(val) => Arc::new(Int32Array::from_value(*val, num_rows)),
+            Long(val) => Arc::new(Int64Array::from_value(*val, num_rows)),
+            Short(val) => Arc::new(Int16Array::from_value(*val, num_rows)),
+            Byte(val) => Arc::new(Int8Array::from_value(*val, num_rows)),
+            Float(val) => Arc::new(Float32Array::from_value(*val, num_rows)),
+            Double(val) => Arc::new(Float64Array::from_value(*val, num_rows)),
+            String(val) => Arc::new(StringArray::from(vec![val.clone(); num_rows])),
+            Boolean(val) => Arc::new(BooleanArray::from(vec![*val; num_rows])),
+            Timestamp(val) => {
+                Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows).with_timezone("UTC"))
+            }
+            TimestampNtz(val) => Arc::new(TimestampMicrosecondArray::from_value(*val, num_rows)),
+            Date(val) => Arc::new(Date32Array::from_value(*val, num_rows)),
+            Binary(val) => Arc::new(BinaryArray::from(vec![val.as_slice(); num_rows])),
+            Decimal(val, precision, scale) => Arc::new(
+                Decimal128Array::from_value(*val, num_rows)
+                    .with_precision_and_scale(*precision, *scale as i8)?,
+            ),
+            Struct(data) => {
+                let arrays = data
+                    .values()
+                    .iter()
+                    .map(|val| val.to_array(num_rows))
+                    .try_collect()?;
+                let fields: Fields = data
+                    .fields()
+                    .iter()
+                    .map(ArrowField::try_from)
+                    .try_collect()?;
+                Arc::new(StructArray::try_new(fields, arrays, None)?)
+            }
+            Array(data) => {
+                #[allow(deprecated)]
+                let values = data.array_elements();
+                let vecs: Vec<_> = values.iter().map(|v| v.to_array(num_rows)).try_collect()?;
+                let values: Vec<_> = vecs.iter().map(|x| x.as_ref()).collect();
+                let offsets: Vec<_> = vecs.iter().map(|v| v.len()).collect();
+                let offset_buffer = OffsetBuffer::from_lengths(offsets);
+                let field = ArrowField::try_from(data.array_type())?;
+                Arc::new(ListArray::new(
+                    Arc::new(field),
+                    offset_buffer,
+                    concat(values.as_slice())?,
+                    None,
+                ))
+            }
+            Null(DataType::BYTE) => Arc::new(Int8Array::new_null(num_rows)),
+            Null(DataType::SHORT) => Arc::new(Int16Array::new_null(num_rows)),
+            Null(DataType::INTEGER) => Arc::new(Int32Array::new_null(num_rows)),
+            Null(DataType::LONG) => Arc::new(Int64Array::new_null(num_rows)),
+            Null(DataType::FLOAT) => Arc::new(Float32Array::new_null(num_rows)),
+            Null(DataType::DOUBLE) => Arc::new(Float64Array::new_null(num_rows)),
+            Null(DataType::STRING) => Arc::new(StringArray::new_null(num_rows)),
+            Null(DataType::BOOLEAN) => Arc::new(BooleanArray::new_null(num_rows)),
+            Null(DataType::TIMESTAMP) => {
+                Arc::new(TimestampMicrosecondArray::new_null(num_rows).with_timezone("UTC"))
+            }
+            Null(DataType::TIMESTAMP_NTZ) => {
+                Arc::new(TimestampMicrosecondArray::new_null(num_rows))
+            }
+            Null(DataType::DATE) => Arc::new(Date32Array::new_null(num_rows)),
+            Null(DataType::BINARY) => Arc::new(BinaryArray::new_null(num_rows)),
+            Null(DataType::Primitive(PrimitiveType::Decimal(precision, scale))) => Arc::new(
+                Decimal128Array::new_null(num_rows)
+                    .with_precision_and_scale(*precision, *scale as i8)?,
+            ),
+            Null(DataType::Struct(t)) => {
+                let fields: Fields = t.fields().map(ArrowField::try_from).try_collect()?;
+                Arc::new(StructArray::new_null(fields, num_rows))
+            }
+            Null(DataType::Array(t)) => {
+                let field = ArrowField::new(LIST_ARRAY_ROOT, t.element_type().try_into()?, true);
+                Arc::new(ListArray::new_null(Arc::new(field), num_rows))
+            }
+            Null(DataType::Map { .. }) => {
+                return Err(Error::unsupported(
+                    "Scalar::to_array does not yet support Map types",
+                ));
+            }
+        };
+        Ok(arr)
+    }
+}
+
+#[derive(Debug)]
+pub struct ArrowEvaluationHandler;
+
+impl EvaluationHandler for ArrowEvaluationHandler {
+    fn new_expression_evaluator(
+        &self,
+        schema: SchemaRef,
+        expression: Expression,
+        output_type: DataType,
+    ) -> Arc<dyn ExpressionEvaluator> {
+        Arc::new(DefaultExpressionEvaluator {
+            input_schema: schema,
+            expression: Box::new(expression),
+            output_type,
+        })
+    }
+
+    /// Create a single-row array with all-null leaf values. Note that if a nested struct is
+    /// included in the `output_type`, the entire struct will be NULL (instead of a not-null struct
+    /// with NULL fields).
+    fn null_row(&self, output_schema: SchemaRef) -> DeltaResult<Box<dyn EngineData>> {
+        let fields = output_schema.fields();
+        let arrays = fields
+            .map(|field| Scalar::Null(field.data_type().clone()).to_array(1))
+            .try_collect()?;
+        let record_batch =
+            RecordBatch::try_new(Arc::new(output_schema.as_ref().try_into()?), arrays)?;
+        Ok(Box::new(ArrowEngineData::new(record_batch)))
+    }
+}
+
+#[derive(Debug)]
+pub struct DefaultExpressionEvaluator {
+    input_schema: SchemaRef,
+    expression: Box<Expression>,
+    output_type: DataType,
+}
+
+impl ExpressionEvaluator for DefaultExpressionEvaluator {
+    fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult<Box<dyn EngineData>> {
+        debug!(
+            "Arrow evaluator evaluating: {:#?}",
+            self.expression.as_ref()
+        );
+        let batch = batch
+            .any_ref()
+            .downcast_ref::<ArrowEngineData>()
+            .ok_or_else(|| Error::engine_data_type("ArrowEngineData"))?
+            .record_batch();
+        let _input_schema: ArrowSchema = self.input_schema.as_ref().try_into()?;
+        // TODO: make sure we have matching schemas for validation
+        // if batch.schema().as_ref() != &input_schema {
+        //     return Err(Error::Generic(format!(
+        //         "input schema does not match batch schema: {:?} != {:?}",
+        //         input_schema,
+        //         batch.schema()
+        //     )));
+        // };
+        let array_ref = evaluate_expression(&self.expression, batch, Some(&self.output_type))?;
+        let batch: RecordBatch = if let DataType::Struct(_) = self.output_type {
+            apply_schema(&array_ref, &self.output_type)?
+        } else {
+            let array_ref = apply_schema_to(&array_ref, &self.output_type)?;
+            let arrow_type: ArrowDataType = ArrowDataType::try_from(&self.output_type)?;
+            let schema = ArrowSchema::new(vec![ArrowField::new("output", arrow_type, true)]);
+            RecordBatch::try_new(Arc::new(schema), vec![array_ref])?
+        };
+        Ok(Box::new(ArrowEngineData::new(batch)))
+    }
+}
diff --git a/kernel/src/engine/arrow_expression/tests.rs b/kernel/src/engine/arrow_expression/tests.rs
new file mode 100644
index 000000000..586e5cdb7
--- /dev/null
+++ b/kernel/src/engine/arrow_expression/tests.rs
@@ -0,0 +1,502 @@
+use std::ops::{Add, Div, Mul, Sub};
+
+use crate::arrow::array::{
+    create_array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, ListArray, StructArray,
+};
+use crate::arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use crate::arrow::datatypes::{DataType, Field, Fields, Schema};
+
+use super::*;
+use crate::expressions::*;
+use crate::schema::{ArrayType, StructField, StructType};
+use crate::DataType as DeltaDataTypes;
+use crate::EvaluationHandlerExtension as _;
+
+#[test]
+fn test_array_column() {
+    let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]);
+    let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9]));
+    let field = Arc::new(Field::new("item", DataType::Int32, true));
+    let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true));
+
+    let schema = Schema::new([arr_field.clone()]);
+
+    let array = ListArray::new(field.clone(), offsets, Arc::new(values), None);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap();
+
+    let not_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item"));
+
+    let in_op = Expression::binary(BinaryOperator::In, 5, column_expr!("item"));
+
+    let result = evaluate_expression(&not_op, &batch, None).unwrap();
+    let expected = BooleanArray::from(vec![true, false, true]);
+    assert_eq!(result.as_ref(), &expected);
+
+    let in_result = evaluate_expression(&in_op, &batch, None).unwrap();
+    let in_expected = BooleanArray::from(vec![false, true, false]);
+    assert_eq!(in_result.as_ref(), &in_expected);
+}
+
+#[test]
+fn test_bad_right_type_array() {
+    let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]);
+    let field = Arc::new(Field::new("item", DataType::Int32, true));
+    let schema = Schema::new([field.clone()]);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(values.clone())]).unwrap();
+
+    let in_op = Expression::binary(BinaryOperator::NotIn, 5, column_expr!("item"));
+
+    let in_result = evaluate_expression(&in_op, &batch, None);
+
+    assert!(in_result.is_err());
+    assert_eq!(
+        in_result.unwrap_err().to_string(),
+        "Invalid expression evaluation: Cannot cast to list array: Int32"
+    );
+}
+
+#[test]
+fn test_literal_type_array() {
+    let field = Arc::new(Field::new("item", DataType::Int32, true));
+    let schema = Schema::new([field.clone()]);
+    let batch = RecordBatch::new_empty(Arc::new(schema));
+
+    let in_op = Expression::binary(
+        BinaryOperator::NotIn,
+        5,
+        Scalar::Array(ArrayData::new(
+            ArrayType::new(DeltaDataTypes::INTEGER, false),
+            vec![Scalar::Integer(1), Scalar::Integer(2)],
+        )),
+    );
+
+    let in_result = evaluate_expression(&in_op, &batch, None).unwrap();
+    let in_expected = BooleanArray::from(vec![true]);
+    assert_eq!(in_result.as_ref(), &in_expected);
+}
+
+#[test]
+fn test_invalid_array_sides() {
+    let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8]);
+    let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9]));
+    let field = Arc::new(Field::new("item", DataType::Int32, true));
+    let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true));
+
+    let schema = Schema::new([arr_field.clone()]);
+
+    let array = ListArray::new(field.clone(), offsets, Arc::new(values), None);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap();
+
+    let in_op = Expression::binary(
+        BinaryOperator::NotIn,
+        column_expr!("item"),
+        column_expr!("item"),
+    );
+
+    let in_result = evaluate_expression(&in_op, &batch, None);
+
+    assert!(in_result.is_err());
+    assert_eq!(
+            in_result.unwrap_err().to_string(),
+            "Invalid expression evaluation: Invalid right value for (NOT) IN comparison, left is: Column(item) right is: Column(item)".to_string()
+        )
+}
+
+#[test]
+fn test_str_arrays() {
+    let values = GenericStringArray::<i32>::from(vec![
+        "hi", "bye", "hi", "hi", "bye", "bye", "hi", "bye", "hi",
+    ]);
+    let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 9]));
+    let field = Arc::new(Field::new("item", DataType::Utf8, true));
+    let arr_field = Arc::new(Field::new("item", DataType::List(field.clone()), true));
+    let schema = Schema::new([arr_field.clone()]);
+    let array = ListArray::new(field.clone(), offsets, Arc::new(values), None);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array.clone())]).unwrap();
+
+    let str_not_op = Expression::binary(BinaryOperator::NotIn, "bye", column_expr!("item"));
+
+    let str_in_op = Expression::binary(BinaryOperator::In, "hi", column_expr!("item"));
+
+    let result = evaluate_expression(&str_in_op, &batch, None).unwrap();
+    let expected = BooleanArray::from(vec![true, true, true]);
+    assert_eq!(result.as_ref(), &expected);
+
+    let in_result = evaluate_expression(&str_not_op, &batch, None).unwrap();
+    let in_expected = BooleanArray::from(vec![false, false, false]);
+    assert_eq!(in_result.as_ref(), &in_expected);
+}
+
+#[test]
+fn test_extract_column() {
+    let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+    let values = Int32Array::from(vec![1, 2, 3]);
+    let batch =
+        RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values.clone())]).unwrap();
+    let column = column_expr!("a");
+
+    let results = evaluate_expression(&column, &batch, None).unwrap();
+    assert_eq!(results.as_ref(), &values);
+
+    let schema = Schema::new(vec![Field::new(
+        "b",
+        DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
+        false,
+    )]);
+
+    let struct_values: ArrayRef = Arc::new(values.clone());
+    let struct_array = StructArray::from(vec![(
+        Arc::new(Field::new("a", DataType::Int32, false)),
+        struct_values,
+    )]);
+    let batch = RecordBatch::try_new(
+        Arc::new(schema.clone()),
+        vec![Arc::new(struct_array.clone())],
+    )
+    .unwrap();
+    let column = column_expr!("b.a");
+    let results = evaluate_expression(&column, &batch, None).unwrap();
+    assert_eq!(results.as_ref(), &values);
+}
+
+#[test]
+fn test_binary_op_scalar() {
+    let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+    let values = Int32Array::from(vec![1, 2, 3]);
+    let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
+    let column = column_expr!("a");
+
+    let expression = column.clone().add(1);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![2, 3, 4]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().sub(1);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![0, 1, 2]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().mul(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    // TODO handle type casting
+    let expression = column.div(1);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![1, 2, 3]));
+    assert_eq!(results.as_ref(), expected.as_ref())
+}
+
+#[test]
+fn test_binary_op() {
+    let schema = Schema::new(vec![
+        Field::new("a", DataType::Int32, false),
+        Field::new("b", DataType::Int32, false),
+    ]);
+    let values = Int32Array::from(vec![1, 2, 3]);
+    let batch = RecordBatch::try_new(
+        Arc::new(schema.clone()),
+        vec![Arc::new(values.clone()), Arc::new(values)],
+    )
+    .unwrap();
+    let column_a = column_expr!("a");
+    let column_b = column_expr!("b");
+
+    let expression = column_a.clone().add(column_b.clone());
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![2, 4, 6]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column_a.clone().sub(column_b.clone());
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![0, 0, 0]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column_a.clone().mul(column_b);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(Int32Array::from(vec![1, 4, 9]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+}
+
+#[test]
+fn test_binary_cmp() {
+    let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+    let values = Int32Array::from(vec![1, 2, 3]);
+    let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(values)]).unwrap();
+    let column = column_expr!("a");
+
+    let expression = column.clone().lt(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![true, false, false]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().lt_eq(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![true, true, false]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().gt(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![false, false, true]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().gt_eq(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![false, true, true]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().eq(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![false, true, false]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column.clone().ne(2);
+    let results = evaluate_expression(&expression, &batch, None).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![true, false, true]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+}
+
+#[test]
+fn test_logical() {
+    let schema = Schema::new(vec![
+        Field::new("a", DataType::Boolean, false),
+        Field::new("b", DataType::Boolean, false),
+    ]);
+    let batch = RecordBatch::try_new(
+        Arc::new(schema.clone()),
+        vec![
+            Arc::new(BooleanArray::from(vec![true, false])),
+            Arc::new(BooleanArray::from(vec![false, true])),
+        ],
+    )
+    .unwrap();
+    let column_a = column_expr!("a");
+    let column_b = column_expr!("b");
+
+    let expression = column_a.clone().and(column_b.clone());
+    let results =
+        evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![false, false]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column_a.clone().and(true);
+    let results =
+        evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![true, false]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column_a.clone().or(column_b);
+    let results =
+        evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![true, true]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+
+    let expression = column_a.clone().or(false);
+    let results =
+        evaluate_expression(&expression, &batch, Some(&crate::schema::DataType::BOOLEAN)).unwrap();
+    let expected = Arc::new(BooleanArray::from(vec![true, false]));
+    assert_eq!(results.as_ref(), expected.as_ref());
+}
+
+#[test]
+fn test_null_row() {
+    // note that we _allow_ nested nulls, since the top-level struct can be NULL
+    let schema = Arc::new(StructType::new(vec![
+        StructField::nullable(
+            "x",
+            StructType::new([
+                StructField::nullable("a", crate::schema::DataType::INTEGER),
+                StructField::not_null("b", crate::schema::DataType::STRING),
+            ]),
+        ),
+        StructField::nullable("c", crate::schema::DataType::STRING),
+    ]));
+    let handler = ArrowEvaluationHandler;
+    let result = handler.null_row(schema.clone()).unwrap();
+    let expected = RecordBatch::try_new(
+        Arc::new(schema.as_ref().try_into().unwrap()),
+        vec![
+            Arc::new(StructArray::new_null(
+                [
+                    Arc::new(Field::new("a", DataType::Int32, true)),
+                    Arc::new(Field::new("b", DataType::Utf8, false)),
+                ]
+                .into(),
+                1,
+            )),
+            create_array!(Utf8, [None::<String>]),
+        ],
+    )
+    .unwrap();
+    let result: RecordBatch = result
+        .into_any()
+        .downcast::<ArrowEngineData>()
+        .unwrap()
+        .into();
+    assert_eq!(result, expected);
+}
+
+#[test]
+fn test_null_row_err() {
+    let not_null_schema = Arc::new(StructType::new(vec![StructField::not_null(
+        "a",
+        crate::schema::DataType::STRING,
+    )]));
+    let handler = ArrowEvaluationHandler;
+    assert!(handler.null_row(not_null_schema).is_err());
+}
+
+// helper to take values/schema to pass to `create_one` and assert the result = expected
+fn assert_create_one(values: &[Scalar], schema: SchemaRef, expected: RecordBatch) {
+    let handler = ArrowEvaluationHandler;
+    let actual = handler.create_one(schema, values).unwrap();
+    let actual_rb: RecordBatch = actual
+        .into_any()
+        .downcast::<ArrowEngineData>()
+        .unwrap()
+        .into();
+    assert_eq!(actual_rb, expected);
+}
+
+#[test]
+fn test_create_one() {
+    let values: &[Scalar] = &[
+        1.into(),
+        "B".into(),
+        3.into(),
+        Scalar::Null(DeltaDataTypes::INTEGER),
+    ];
+    let schema = Arc::new(StructType::new([
+        StructField::nullable("a", DeltaDataTypes::INTEGER),
+        StructField::nullable("b", DeltaDataTypes::STRING),
+        StructField::not_null("c", DeltaDataTypes::INTEGER),
+        StructField::nullable("d", DeltaDataTypes::INTEGER),
+    ]));
+
+    let expected_schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int32, true),
+        Field::new("b", DataType::Utf8, true),
+        Field::new("c", DataType::Int32, false),
+        Field::new("d", DataType::Int32, true),
+    ]));
+    let expected = RecordBatch::try_new(
+        expected_schema,
+        vec![
+            create_array!(Int32, [1]),
+            create_array!(Utf8, ["B"]),
+            create_array!(Int32, [3]),
+            create_array!(Int32, [None]),
+        ],
+    )
+    .unwrap();
+    assert_create_one(values, schema, expected);
+}
+
+#[test]
+fn test_create_one_nested() {
+    let values: &[Scalar] = &[1.into(), 2.into()];
+    let schema = Arc::new(StructType::new([StructField::not_null(
+        "a",
+        DeltaDataTypes::struct_type([
+            StructField::nullable("b", DeltaDataTypes::INTEGER),
+            StructField::not_null("c", DeltaDataTypes::INTEGER),
+        ]),
+    )]));
+    let expected_schema = Arc::new(Schema::new(vec![Field::new(
+        "a",
+        DataType::Struct(
+            vec![
+                Field::new("b", DataType::Int32, true),
+                Field::new("c", DataType::Int32, false),
+            ]
+            .into(),
+        ),
+        false,
+    )]));
+    let expected = RecordBatch::try_new(
+        expected_schema,
+        vec![Arc::new(StructArray::from(vec![
+            (
+                Arc::new(Field::new("b", DataType::Int32, true)),
+                create_array!(Int32, [1]) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("c", DataType::Int32, false)),
+                create_array!(Int32, [2]) as ArrayRef,
+            ),
+        ]))],
+    )
+    .unwrap();
+    assert_create_one(values, schema, expected);
+}
+
+#[test]
+fn test_create_one_nested_null() {
+    let values: &[Scalar] = &[Scalar::Null(DeltaDataTypes::INTEGER), 1.into()];
+    let schema = Arc::new(StructType::new([StructField::not_null(
+        "a",
+        DeltaDataTypes::struct_type([
+            StructField::nullable("b", DeltaDataTypes::INTEGER),
+            StructField::not_null("c", DeltaDataTypes::INTEGER),
+        ]),
+    )]));
+    let expected_schema = Arc::new(Schema::new(vec![Field::new(
+        "a",
+        DataType::Struct(
+            vec![
+                Field::new("b", DataType::Int32, true),
+                Field::new("c", DataType::Int32, false),
+            ]
+            .into(),
+        ),
+        false,
+    )]));
+    let expected = RecordBatch::try_new(
+        expected_schema,
+        vec![Arc::new(StructArray::from(vec![
+            (
+                Arc::new(Field::new("b", DataType::Int32, true)),
+                create_array!(Int32, [None]) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("c", DataType::Int32, false)),
+                create_array!(Int32, [1]) as ArrayRef,
+            ),
+        ]))],
+    )
+    .unwrap();
+    assert_create_one(values, schema, expected);
+}
+
+#[test]
+fn test_create_one_not_null_struct() {
+    let values: &[Scalar] = &[
+        Scalar::Null(DeltaDataTypes::INTEGER),
+        Scalar::Null(DeltaDataTypes::INTEGER),
+    ];
+    let schema = Arc::new(StructType::new([StructField::not_null(
+        "a",
+        DeltaDataTypes::struct_type([
+            StructField::not_null("b", DeltaDataTypes::INTEGER),
+            StructField::nullable("c", DeltaDataTypes::INTEGER),
+        ]),
+    )]));
+    let handler = ArrowEvaluationHandler;
+    assert!(handler.create_one(schema, values).is_err());
+}
+
+#[test]
+fn test_create_one_top_level_null() {
+    let values = &[Scalar::Null(DeltaDataTypes::INTEGER)];
+    let handler = ArrowEvaluationHandler;
+
+    let schema = Arc::new(StructType::new([StructField::not_null(
+        "col_1",
+        DeltaDataTypes::INTEGER,
+    )]));
+    assert!(matches!(
+        handler.create_one(schema, values),
+        Err(Error::InvalidStructData(_))
+    ));
+}
diff --git a/kernel/src/engine/arrow_get_data.rs b/kernel/src/engine/arrow_get_data.rs
index 145aab66b..fbed64df1 100644
--- a/kernel/src/engine/arrow_get_data.rs
+++ b/kernel/src/engine/arrow_get_data.rs
@@ -1,4 +1,4 @@
-use arrow_array::{
+use crate::arrow::array::{
     types::{GenericStringType, Int32Type, Int64Type},
     Array, BooleanArray, GenericByteArray, GenericListArray, MapArray, OffsetSizeTrait,
     PrimitiveArray,
diff --git a/kernel/src/engine/arrow_utils.rs b/kernel/src/engine/arrow_utils.rs
index 06441b9d4..749f1399c 100644
--- a/kernel/src/engine/arrow_utils.rs
+++ b/kernel/src/engine/arrow_utils.rs
@@ -12,18 +12,19 @@ use crate::{
     DeltaResult, EngineData, Error,
 };
 
-use arrow_array::{
-    cast::AsArray, new_null_array, Array as ArrowArray, GenericListArray, OffsetSizeTrait,
-    RecordBatch, StringArray, StructArray,
+use crate::arrow::array::{
+    cast::AsArray, make_array, new_null_array, Array as ArrowArray, GenericListArray,
+    OffsetSizeTrait, RecordBatch, StringArray, StructArray,
 };
-use arrow_json::{LineDelimitedWriter, ReaderBuilder};
-use arrow_schema::{
+use crate::arrow::buffer::NullBuffer;
+use crate::arrow::compute::concat_batches;
+use crate::arrow::datatypes::{
     DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef, Fields,
     SchemaRef as ArrowSchemaRef,
 };
-use arrow_select::concat::concat_batches;
+use crate::arrow::json::{LineDelimitedWriter, ReaderBuilder};
+use crate::parquet::{arrow::ProjectionMask, schema::types::SchemaDescriptor};
 use itertools::Itertools;
-use parquet::{arrow::ProjectionMask, schema::types::SchemaDescriptor};
 use tracing::debug;
 
 macro_rules! prim_array_cmp {
@@ -40,7 +41,7 @@ macro_rules! prim_array_cmp {
                         .ok_or(Error::invalid_expression(
                             format!("Cannot cast to list array: {}", $right_arr.data_type()))
                         )?;
-                arrow_ord::comparison::in_list(prim_array, list_array).map(wrap_comparison_result)
+                crate::arrow::compute::kernels::comparison::in_list(prim_array, list_array).map(wrap_comparison_result)
             }
         )+
             _ => Err(ArrowError::CastError(
@@ -59,7 +60,25 @@ pub(crate) use prim_array_cmp;
 /// returns a tuples of (mask_indices: Vec<parquet_schema_index>, reorder_indices:
 /// Vec<requested_index>). `mask_indices` is used for generating the mask for reading from the
 pub(crate) fn make_arrow_error(s: impl Into<String>) -> Error {
-    Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(s.into())).with_backtrace()
+    Error::Arrow(crate::arrow::error::ArrowError::InvalidArgumentError(
+        s.into(),
+    ))
+    .with_backtrace()
+}
+
+/// Applies post-processing to data read from parquet files. This includes `reorder_struct_array` to
+/// ensure schema compatibility, as well as `fix_nested_null_masks` to ensure that leaf columns have
+/// accurate null masks that row visitors rely on for correctness.
+pub(crate) fn fixup_parquet_read<T>(
+    batch: RecordBatch,
+    requested_ordering: &[ReorderIndex],
+) -> DeltaResult<T>
+where
+    StructArray: Into<T>,
+{
+    let data = reorder_struct_array(batch.into(), requested_ordering)?;
+    let data = fix_nested_null_masks(data);
+    Ok(data.into())
 }
 
 /*
@@ -500,7 +519,7 @@ pub(crate) fn reorder_struct_array(
             match &reorder_index.transform {
                 ReorderIndexTransform::Cast(target) => {
                     let col = input_cols[parquet_position].as_ref();
-                    let col = Arc::new(arrow_cast::cast::cast(col, target)?);
+                    let col = Arc::new(crate::arrow::compute::cast(col, target)?);
                     let new_field = Arc::new(
                         input_fields[parquet_position]
                             .as_ref()
@@ -609,6 +628,53 @@ fn reorder_list<O: OffsetSizeTrait>(
     }
 }
 
+/// Use this function to recursively compute properly unioned null masks for all nested
+/// columns of a record batch, making it safe to project out and consume nested columns.
+///
+/// Arrow does not guarantee that the null masks associated with nested columns are accurate --
+/// instead, the reader must consult the union of logical null masks the column and all
+/// ancestors. The parquet reader stopped doing this automatically as of arrow-53.3, for example.
+pub fn fix_nested_null_masks(batch: StructArray) -> StructArray {
+    compute_nested_null_masks(batch, None)
+}
+
+/// Splits a StructArray into its parts, unions in the parent null mask, and uses the result to
+/// recursively update the children as well before putting everything back together.
+fn compute_nested_null_masks(sa: StructArray, parent_nulls: Option<&NullBuffer>) -> StructArray {
+    let (fields, columns, nulls) = sa.into_parts();
+    let nulls = NullBuffer::union(parent_nulls, nulls.as_ref());
+    let columns = columns
+        .into_iter()
+        .map(|column| match column.as_struct_opt() {
+            Some(sa) => Arc::new(compute_nested_null_masks(sa.clone(), nulls.as_ref())) as _,
+            None => {
+                let data = column.to_data();
+                let nulls = NullBuffer::union(nulls.as_ref(), data.nulls());
+                let builder = data.into_builder().nulls(nulls);
+                // Use an unchecked build to avoid paying a redundant O(k) validation cost for a
+                // `RecordBatch` with k leaf columns.
+                //
+                // SAFETY: The builder was constructed from an `ArrayData` we extracted from the
+                // column. The change we make is the null buffer, via `NullBuffer::union` with input
+                // null buffers that were _also_ extracted from the column and its parent. A union
+                // can only _grow_ the set of NULL rows, so data validity is preserved. Even if the
+                // `parent_nulls` somehow had a length mismatch --- which it never should, having
+                // also been extracted from our grandparent --- the mismatch would have already
+                // caused `NullBuffer::union` to panic.
+                let data = unsafe { builder.build_unchecked() };
+                make_array(data)
+            }
+        })
+        .collect();
+
+    // Use an unchecked constructor to avoid paying O(n*k) a redundant null buffer validation cost
+    // for a `RecordBatch` with n rows and k leaf columns.
+    //
+    // SAFETY: We are simply reassembling the input `StructArray` we previously broke apart, with
+    // updated null buffers. See above for details about null buffer safety.
+    unsafe { StructArray::new_unchecked(fields, columns, nulls) }
+}
+
 /// Arrow lacks the functionality to json-parse a string column into a struct column -- even tho the
 /// JSON file reader does exactly the same thing. This function is a hack to work around that gap.
 pub(crate) fn parse_json(
@@ -679,17 +745,17 @@ pub(crate) fn to_json_bytes(
 mod tests {
     use std::sync::Arc;
 
-    use arrow::{
-        array::AsArray,
-        buffer::{OffsetBuffer, ScalarBuffer},
-    };
-    use arrow_array::{
+    use crate::arrow::array::{
         Array, ArrayRef as ArrowArrayRef, BooleanArray, GenericListArray, Int32Array, StructArray,
     };
-    use arrow_schema::{
+    use crate::arrow::datatypes::{
         DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema,
         SchemaRef as ArrowSchemaRef,
     };
+    use crate::arrow::{
+        array::AsArray,
+        buffer::{OffsetBuffer, ScalarBuffer},
+    };
 
     use crate::schema::{ArrayType, DataType, MapType, StructField, StructType};
 
@@ -1432,4 +1498,107 @@ mod tests {
         );
         Ok(())
     }
+
+    #[test]
+    fn test_arrow_broken_nested_null_masks() {
+        use crate::arrow::datatypes::{DataType, Field, Fields, Schema};
+        use crate::engine::arrow_utils::fix_nested_null_masks;
+        use crate::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+
+        // Parse some JSON into a nested schema
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "outer",
+            DataType::Struct(Fields::from(vec![
+                Field::new(
+                    "inner_nullable",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("leaf_non_null", DataType::Int32, false),
+                        Field::new("leaf_nullable", DataType::Int32, true),
+                    ])),
+                    true,
+                ),
+                Field::new(
+                    "inner_non_null",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("leaf_non_null", DataType::Int32, false),
+                        Field::new("leaf_nullable", DataType::Int32, true),
+                    ])),
+                    false,
+                ),
+            ])),
+            true,
+        )]));
+        let json_string = r#"
+{ }
+{ "outer" : { "inner_non_null" : { "leaf_non_null" : 1 } } }
+{ "outer" : { "inner_non_null" : { "leaf_non_null" : 2, "leaf_nullable" : 3 } } }
+{ "outer" : { "inner_non_null" : { "leaf_non_null" : 4 }, "inner_nullable" : { "leaf_non_null" : 5 } } }
+{ "outer" : { "inner_non_null" : { "leaf_non_null" : 6 }, "inner_nullable" : { "leaf_non_null" : 7, "leaf_nullable": 8 } } }
+"#;
+        let batch1 = crate::arrow::json::ReaderBuilder::new(schema.clone())
+            .build(json_string.as_bytes())
+            .unwrap()
+            .next()
+            .unwrap()
+            .unwrap();
+        println!("Batch 1: {batch1:?}");
+
+        macro_rules! assert_nulls {
+            ( $column: expr, $nulls: expr ) => {
+                assert_eq!($column.nulls().unwrap(), &NullBuffer::from(&$nulls[..]));
+            };
+        }
+
+        // If any of these tests ever fail, it means the arrow JSON reader started producing
+        // incomplete nested NULL masks. If that happens, we need to update all JSON reads to call
+        // `fix_nested_null_masks`.
+        let outer_1 = batch1.column(0).as_struct();
+        assert_nulls!(outer_1, [false, true, true, true, true]);
+        let inner_nullable_1 = outer_1.column(0).as_struct();
+        assert_nulls!(inner_nullable_1, [false, false, false, true, true]);
+        let nullable_leaf_non_null_1 = inner_nullable_1.column(0);
+        assert_nulls!(nullable_leaf_non_null_1, [false, false, false, true, true]);
+        let nullable_leaf_nullable_1 = inner_nullable_1.column(1);
+        assert_nulls!(nullable_leaf_nullable_1, [false, false, false, false, true]);
+        let inner_non_null_1 = outer_1.column(1).as_struct();
+        assert_nulls!(inner_non_null_1, [false, true, true, true, true]);
+        let non_null_leaf_non_null_1 = inner_non_null_1.column(0);
+        assert_nulls!(non_null_leaf_non_null_1, [false, true, true, true, true]);
+        let non_null_leaf_nullable_1 = inner_non_null_1.column(1);
+        assert_nulls!(non_null_leaf_nullable_1, [false, false, true, false, false]);
+
+        // Write the batch to a parquet file and read it back
+        let mut buffer = vec![];
+        let mut writer =
+            crate::parquet::arrow::ArrowWriter::try_new(&mut buffer, schema.clone(), None).unwrap();
+        writer.write(&batch1).unwrap();
+        writer.close().unwrap(); // writer must be closed to write footer
+        let batch2 = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(buffer))
+            .unwrap()
+            .build()
+            .unwrap()
+            .next()
+            .unwrap()
+            .unwrap();
+        println!("Batch 2 before: {batch2:?}");
+
+        // Starting from arrow-53.3, the parquet reader started returning broken nested NULL masks.
+        let batch2 = RecordBatch::from(fix_nested_null_masks(batch2.into()));
+
+        // Verify the data survived the round trip
+        let outer_2 = batch2.column(0).as_struct();
+        assert_eq!(outer_2, outer_1);
+        let inner_nullable_2 = outer_2.column(0).as_struct();
+        assert_eq!(inner_nullable_2, inner_nullable_1);
+        let nullable_leaf_non_null_2 = inner_nullable_2.column(0);
+        assert_eq!(nullable_leaf_non_null_2, nullable_leaf_non_null_1);
+        let nullable_leaf_nullable_2 = inner_nullable_2.column(1);
+        assert_eq!(nullable_leaf_nullable_2, nullable_leaf_nullable_1);
+        let inner_non_null_2 = outer_2.column(1).as_struct();
+        assert_eq!(inner_non_null_2, inner_non_null_1);
+        let non_null_leaf_non_null_2 = inner_non_null_2.column(0);
+        assert_eq!(non_null_leaf_non_null_2, non_null_leaf_non_null_1);
+        let non_null_leaf_nullable_2 = inner_non_null_2.column(1);
+        assert_eq!(non_null_leaf_nullable_2, non_null_leaf_nullable_1);
+    }
 }
diff --git a/kernel/src/engine/default/file_stream.rs b/kernel/src/engine/default/file_stream.rs
index 075716a75..bcdc370a0 100644
--- a/kernel/src/engine/default/file_stream.rs
+++ b/kernel/src/engine/default/file_stream.rs
@@ -5,8 +5,8 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{ready, Context, Poll};
 
-use arrow_array::RecordBatch;
-use arrow_schema::SchemaRef as ArrowSchemaRef;
+use crate::arrow::array::RecordBatch;
+use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef;
 use futures::future::BoxFuture;
 use futures::stream::{BoxStream, Stream, StreamExt};
 use futures::FutureExt;
diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs
index 5606a28d0..fdbc79ebf 100644
--- a/kernel/src/engine/default/filesystem.rs
+++ b/kernel/src/engine/default/filesystem.rs
@@ -7,29 +7,27 @@ use object_store::path::Path;
 use object_store::{DynObjectStore, ObjectStore};
 use url::Url;
 
+use super::UrlExt;
 use crate::engine::default::executor::TaskExecutor;
-use crate::{DeltaResult, Error, FileMeta, FileSlice, FileSystemClient};
+use crate::{DeltaResult, Error, FileMeta, FileSlice, StorageHandler};
 
 #[derive(Debug)]
-pub struct ObjectStoreFileSystemClient<E: TaskExecutor> {
+pub struct ObjectStoreStorageHandler<E: TaskExecutor> {
     inner: Arc<DynObjectStore>,
     has_ordered_listing: bool,
-    table_root: Path,
     task_executor: Arc<E>,
     readahead: usize,
 }
 
-impl<E: TaskExecutor> ObjectStoreFileSystemClient<E> {
+impl<E: TaskExecutor> ObjectStoreStorageHandler<E> {
     pub(crate) fn new(
         store: Arc<DynObjectStore>,
         has_ordered_listing: bool,
-        table_root: Path,
         task_executor: Arc<E>,
     ) -> Self {
         Self {
             inner: store,
             has_ordered_listing,
-            table_root,
             task_executor,
             readahead: 10,
         }
@@ -42,21 +40,33 @@ impl<E: TaskExecutor> ObjectStoreFileSystemClient<E> {
     }
 }
 
-impl<E: TaskExecutor> FileSystemClient for ObjectStoreFileSystemClient<E> {
+impl<E: TaskExecutor> StorageHandler for ObjectStoreStorageHandler<E> {
     fn list_from(
         &self,
         path: &Url,
     ) -> DeltaResult<Box<dyn Iterator<Item = DeltaResult<FileMeta>>>> {
-        let url = path.clone();
-        let offset = Path::from(path.path());
-        // TODO properly handle table prefix
-        let prefix = self.table_root.child("_delta_log");
+        // The offset is used for list-after; the prefix is used to restrict the listing to a specific directory.
+        // Unfortunately, `Path` provides no easy way to check whether a name is directory-like,
+        // because it strips trailing /, so we're reduced to manually checking the original URL.
+        let offset = Path::from_url_path(path.path())?;
+        let prefix = if path.path().ends_with('/') {
+            offset.clone()
+        } else {
+            let mut parts = offset.parts().collect_vec();
+            if parts.pop().is_none() {
+                return Err(Error::Generic(format!(
+                    "Offset path must not be a root directory. Got: '{}'",
+                    path.as_str()
+                )));
+            }
+            Path::from_iter(parts)
+        };
 
         let store = self.inner.clone();
 
         // This channel will become the iterator
         let (sender, receiver) = std::sync::mpsc::sync_channel(4_000);
-
+        let url = path.clone();
         self.task_executor.spawn(async move {
             let mut stream = store.list_with_offset(Some(&prefix), &offset);
 
@@ -122,19 +132,14 @@ impl<E: TaskExecutor> FileSystemClient for ObjectStoreFileSystemClient<E> {
                     };
                     let store = store.clone();
                     async move {
-                        match url.scheme() {
-                            "http" | "https" => {
-                                // have to annotate type here or rustc can't figure it out
-                                Ok::<bytes::Bytes, Error>(reqwest::get(url).await?.bytes().await?)
-                            }
-                            _ => {
-                                if let Some(rng) = range {
-                                    Ok(store.get_range(&path, rng).await?)
-                                } else {
-                                    let result = store.get(&path).await?;
-                                    Ok(result.bytes().await?)
-                                }
-                            }
+                        if url.is_presigned() {
+                            // have to annotate type here or rustc can't figure it out
+                            Ok::<bytes::Bytes, Error>(reqwest::get(url).await?.bytes().await?)
+                        } else if let Some(rng) = range {
+                            Ok(store.get_range(&path, rng).await?)
+                        } else {
+                            let result = store.get(&path).await?;
+                            Ok(result.bytes().await?)
                         }
                     }
                 })
@@ -192,11 +197,9 @@ mod tests {
         let mut url = Url::from_directory_path(tmp.path()).unwrap();
 
         let store = Arc::new(LocalFileSystem::new());
-        let prefix = Path::from(url.path());
-        let client = ObjectStoreFileSystemClient::new(
+        let storage = ObjectStoreStorageHandler::new(
             store,
             false, // don't have ordered listing
-            prefix,
             Arc::new(TokioBackgroundExecutor::new()),
         );
 
@@ -210,7 +213,7 @@ mod tests {
         url.set_path(&format!("{}/c", url.path()));
         slices.push((url, Some(Range { start: 4, end: 9 })));
         dbg!("Slices are: {}", &slices);
-        let data: Vec<Bytes> = client.read_files(slices).unwrap().try_collect().unwrap();
+        let data: Vec<Bytes> = storage.read_files(slices).unwrap().try_collect().unwrap();
 
         assert_eq!(data.len(), 3);
         assert_eq!(data[0], Bytes::from("kernel"));
@@ -229,11 +232,10 @@ mod tests {
         store.put(&name, data.clone().into()).await.unwrap();
 
         let table_root = Url::parse("memory:///").expect("valid url");
-        let prefix = Path::from_url_path(table_root.path()).expect("Couldn't get path");
-        let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new()));
+        let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new()));
         let files: Vec<_> = engine
-            .get_file_system_client()
-            .list_from(&table_root)
+            .storage_handler()
+            .list_from(&table_root.join("_delta_log").unwrap().join("0").unwrap())
             .unwrap()
             .try_collect()
             .unwrap();
@@ -260,11 +262,11 @@ mod tests {
 
         let url = Url::from_directory_path(tmp.path()).unwrap();
         let store = Arc::new(LocalFileSystem::new());
-        let prefix = Path::from_url_path(url.path()).expect("Couldn't get path");
-        let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new()));
-        let client = engine.get_file_system_client();
-
-        let files = client.list_from(&Url::parse("file://").unwrap()).unwrap();
+        let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new()));
+        let files = engine
+            .storage_handler()
+            .list_from(&url.join("_delta_log").unwrap().join("0").unwrap())
+            .unwrap();
         let mut len = 0;
         for (file, expected) in files.zip(expected_names.iter()) {
             assert!(
diff --git a/kernel/src/engine/default/json.rs b/kernel/src/engine/default/json.rs
index ab296e12a..1dc35539e 100644
--- a/kernel/src/engine/default/json.rs
+++ b/kernel/src/engine/default/json.rs
@@ -2,19 +2,22 @@
 
 use std::io::BufReader;
 use std::ops::Range;
-use std::sync::Arc;
-use std::task::{ready, Poll};
+use std::sync::{mpsc, Arc};
+use std::task::Poll;
 
-use arrow_json::ReaderBuilder;
-use arrow_schema::SchemaRef as ArrowSchemaRef;
+use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef;
+use crate::arrow::json::ReaderBuilder;
+use crate::arrow::record_batch::RecordBatch;
 use bytes::{Buf, Bytes};
-use futures::{StreamExt, TryStreamExt};
+use futures::stream::{self, BoxStream};
+use futures::{ready, StreamExt, TryStreamExt};
 use object_store::path::Path;
 use object_store::{DynObjectStore, GetResultPayload};
+use tracing::warn;
 use url::Url;
 
 use super::executor::TaskExecutor;
-use super::file_stream::{FileOpenFuture, FileOpener, FileStream};
+use crate::engine::arrow_data::ArrowEngineData;
 use crate::engine::arrow_utils::parse_json as arrow_parse_json;
 use crate::engine::arrow_utils::to_json_bytes;
 use crate::schema::SchemaRef;
@@ -23,15 +26,21 @@ use crate::{
     JsonHandler,
 };
 
+const DEFAULT_BUFFER_SIZE: usize = 1000;
+const DEFAULT_BATCH_SIZE: usize = 1000;
+
 #[derive(Debug)]
 pub struct DefaultJsonHandler<E: TaskExecutor> {
     /// The object store to read files from
     store: Arc<DynObjectStore>,
     /// The executor to run async tasks on
     task_executor: Arc<E>,
-    /// The maximum number of batches to read ahead
-    readahead: usize,
-    /// The number of rows to read per batch
+    /// The maximum number of read requests to buffer in memory at once. Note that this actually
+    /// controls two things: the number of concurrent requests (done by `buffered`) and the size of
+    /// the buffer (via our `sync_channel`).
+    buffer_size: usize,
+    /// Limit the number of rows per batch. That is, for batch_size = N, then each RecordBatch
+    /// yielded by the stream will have at most N rows.
     batch_size: usize,
 }
 
@@ -40,22 +49,34 @@ impl<E: TaskExecutor> DefaultJsonHandler<E> {
         Self {
             store,
             task_executor,
-            readahead: 10,
-            batch_size: 1024,
+            buffer_size: DEFAULT_BUFFER_SIZE,
+            batch_size: DEFAULT_BATCH_SIZE,
         }
     }
 
-    /// Set the maximum number of batches to read ahead during [Self::read_json_files()].
+    /// Set the maximum number read requests to buffer in memory at once in
+    /// [Self::read_json_files()].
+    ///
+    /// Defaults to 1000.
     ///
-    /// Defaults to 10.
-    pub fn with_readahead(mut self, readahead: usize) -> Self {
-        self.readahead = readahead;
+    /// Memory constraints can be imposed by constraining the buffer size and batch size. Note that
+    /// overall memory usage is proportional to the product of these two values.
+    /// 1. Batch size governs the size of RecordBatches yielded in each iteration of the stream
+    /// 2. Buffer size governs the number of concurrent tasks (which equals the size of the buffer
+    pub fn with_buffer_size(mut self, buffer_size: usize) -> Self {
+        self.buffer_size = buffer_size;
         self
     }
 
-    /// Set the number of rows to read per batch during [Self::parse_json()].
+    /// Limit the number of rows per batch. That is, for batch_size = N, then each RecordBatch
+    /// yielded by the stream will have at most N rows.
+    ///
+    /// Defaults to 1000 rows (json objects).
+    ///
+    /// See [Decoder::with_buffer_size] for details on constraining memory usage with buffer size
+    /// and batch size.
     ///
-    /// Defaults to 1024.
+    /// [Decoder::with_buffer_size]: crate::arrow::json::reader::Decoder
     pub fn with_batch_size(mut self, batch_size: usize) -> Self {
         self.batch_size = batch_size;
         self
@@ -83,13 +104,32 @@ impl<E: TaskExecutor> JsonHandler for DefaultJsonHandler<E> {
 
         let schema: ArrowSchemaRef = Arc::new(physical_schema.as_ref().try_into()?);
         let file_opener = JsonOpener::new(self.batch_size, schema.clone(), self.store.clone());
-        FileStream::new_async_read_iterator(
-            self.task_executor.clone(),
-            schema,
-            Box::new(file_opener),
-            files,
-            self.readahead,
-        )
+
+        let (tx, rx) = mpsc::sync_channel(self.buffer_size);
+        let files = files.to_vec();
+        let buffer_size = self.buffer_size;
+
+        self.task_executor.spawn(async move {
+            // an iterator of futures that open each file
+            let file_futures = files.into_iter().map(|file| file_opener.open(file, None));
+
+            // create a stream from that iterator which buffers up to `buffer_size` futures at a time
+            let mut stream = stream::iter(file_futures)
+                .buffered(buffer_size)
+                .try_flatten()
+                .map_ok(|record_batch| -> Box<dyn EngineData> {
+                    Box::new(ArrowEngineData::new(record_batch))
+                });
+
+            // send each record batch over the channel
+            while let Some(item) = stream.next().await {
+                if tx.send(item).is_err() {
+                    warn!("read_json receiver end of channel dropped before sending completed");
+                }
+            }
+        });
+
+        Ok(Box::new(rx.into_iter()))
     }
 
     // note: for now we just buffer all the data and write it out all at once
@@ -102,7 +142,7 @@ impl<E: TaskExecutor> JsonHandler for DefaultJsonHandler<E> {
         let buffer = to_json_bytes(data)?;
         // Put if absent
         let store = self.store.clone(); // cheap Arc
-        let path = Path::from(path.path());
+        let path = Path::from_url_path(path.path())?;
         let path_str = path.to_string();
         self.task_executor
             .block_on(async move {
@@ -118,7 +158,7 @@ impl<E: TaskExecutor> JsonHandler for DefaultJsonHandler<E> {
     }
 }
 
-/// A [`FileOpener`] that opens a JSON file and yields a [`FileOpenFuture`]
+/// Opens JSON files and returns a stream of record batches
 #[allow(missing_debug_implementations)]
 pub struct JsonOpener {
     batch_size: usize,
@@ -127,97 +167,309 @@ pub struct JsonOpener {
 }
 
 impl JsonOpener {
-    /// Returns a  [`JsonOpener`]
+    /// Returns a [`JsonOpener`]
     pub fn new(
         batch_size: usize,
         projected_schema: ArrowSchemaRef,
-        // file_compression_type: FileCompressionType,
         object_store: Arc<DynObjectStore>,
     ) -> Self {
         Self {
             batch_size,
             projected_schema,
-            // file_compression_type,
             object_store,
         }
     }
 }
 
-impl FileOpener for JsonOpener {
-    fn open(&self, file_meta: FileMeta, _: Option<Range<i64>>) -> DeltaResult<FileOpenFuture> {
+impl JsonOpener {
+    pub async fn open(
+        &self,
+        file_meta: FileMeta,
+        _: Option<Range<i64>>,
+    ) -> DeltaResult<BoxStream<'static, DeltaResult<RecordBatch>>> {
         let store = self.object_store.clone();
         let schema = self.projected_schema.clone();
         let batch_size = self.batch_size;
 
-        Ok(Box::pin(async move {
-            let path = Path::from_url_path(file_meta.location.path())?;
-            match store.get(&path).await?.payload {
-                GetResultPayload::File(file, _) => {
-                    let reader = ReaderBuilder::new(schema)
-                        .with_batch_size(batch_size)
-                        .build(BufReader::new(file))?;
-                    Ok(futures::stream::iter(reader).map_err(Error::from).boxed())
-                }
-                GetResultPayload::Stream(s) => {
-                    let mut decoder = ReaderBuilder::new(schema)
-                        .with_batch_size(batch_size)
-                        .build_decoder()?;
-
-                    let mut input = s.map_err(Error::from);
-                    let mut buffered = Bytes::new();
-
-                    let s = futures::stream::poll_fn(move |cx| {
-                        loop {
-                            if buffered.is_empty() {
-                                buffered = match ready!(input.poll_next_unpin(cx)) {
-                                    Some(Ok(b)) => b,
-                                    Some(Err(e)) => return Poll::Ready(Some(Err(e))),
-                                    None => break,
-                                };
-                            }
-                            let read = buffered.len();
-
-                            let decoded = match decoder.decode(buffered.as_ref()) {
-                                Ok(decoded) => decoded,
-                                Err(e) => return Poll::Ready(Some(Err(e.into()))),
+        let path = Path::from_url_path(file_meta.location.path())?;
+        match store.get(&path).await?.payload {
+            GetResultPayload::File(file, _) => {
+                let reader = ReaderBuilder::new(schema)
+                    .with_batch_size(batch_size)
+                    .build(BufReader::new(file))?;
+                Ok(futures::stream::iter(reader).map_err(Error::from).boxed())
+            }
+            GetResultPayload::Stream(s) => {
+                let mut decoder = ReaderBuilder::new(schema)
+                    .with_batch_size(batch_size)
+                    .build_decoder()?;
+
+                let mut input = s.map_err(Error::from);
+                let mut buffered = Bytes::new();
+
+                let s = futures::stream::poll_fn(move |cx| {
+                    loop {
+                        if buffered.is_empty() {
+                            buffered = match ready!(input.poll_next_unpin(cx)) {
+                                Some(Ok(b)) => b,
+                                Some(Err(e)) => return Poll::Ready(Some(Err(e))),
+                                None => break,
                             };
-
-                            buffered.advance(decoded);
-                            if decoded != read {
-                                break;
-                            }
                         }
+                        let read = buffered.len();
+
+                        // NB (from Decoder::decode docs):
+                        // Read JSON objects from `buf` (param), returning the number of bytes read
+                        //
+                        // This method returns once `batch_size` objects have been parsed since the
+                        // last call to [`Self::flush`], or `buf` is exhausted. Any remaining bytes
+                        // should be included in the next call to [`Self::decode`]
+                        let decoded = match decoder.decode(buffered.as_ref()) {
+                            Ok(decoded) => decoded,
+                            Err(e) => return Poll::Ready(Some(Err(e.into()))),
+                        };
+
+                        buffered.advance(decoded);
+                        if decoded != read {
+                            break;
+                        }
+                    }
 
-                        Poll::Ready(decoder.flush().map_err(Error::from).transpose())
-                    });
-                    Ok(s.map_err(Error::from).boxed())
-                }
+                    Poll::Ready(decoder.flush().map_err(Error::from).transpose())
+                });
+                Ok(s.map_err(Error::from).boxed())
             }
-        }))
+        }
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use std::collections::{HashMap, HashSet, VecDeque};
     use std::path::PathBuf;
-
-    use arrow::array::{AsArray, RecordBatch, StringArray};
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use std::sync::{mpsc, Arc, Mutex};
+    use std::task::Waker;
+
+    use crate::actions::get_log_schema;
+    use crate::arrow::array::{AsArray, Int32Array, RecordBatch, StringArray};
+    use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
+    use crate::engine::arrow_data::ArrowEngineData;
+    use crate::engine::default::executor::tokio::{
+        TokioBackgroundExecutor, TokioMultiThreadExecutor,
+    };
+    use crate::utils::test_utils::string_array_to_engine_data;
+    use futures::future;
     use itertools::Itertools;
-    use object_store::{local::LocalFileSystem, ObjectStore};
+    use object_store::local::LocalFileSystem;
+    use object_store::memory::InMemory;
+    use object_store::{
+        GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+        PutMultipartOpts, PutOptions, PutPayload, PutResult, Result,
+    };
+
+    // TODO: should just use the one from test_utils, but running into dependency issues
+    fn into_record_batch(engine_data: Box<dyn EngineData>) -> RecordBatch {
+        ArrowEngineData::try_from_engine_data(engine_data)
+            .unwrap()
+            .into()
+    }
 
     use super::*;
-    use crate::{
-        actions::get_log_schema, engine::arrow_data::ArrowEngineData,
-        engine::default::executor::tokio::TokioBackgroundExecutor,
-    };
 
-    fn string_array_to_engine_data(string_array: StringArray) -> Box<dyn EngineData> {
-        let string_field = Arc::new(Field::new("a", DataType::Utf8, true));
-        let schema = Arc::new(ArrowSchema::new(vec![string_field]));
-        let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)])
-            .expect("Can't convert to record batch");
-        Box::new(ArrowEngineData::new(batch))
+    /// Store wrapper that wraps an inner store to guarantee the ordering of GET requests. Note
+    /// that since the keys are resolved in order, requests to subsequent keys in the order will
+    /// block until the earlier keys are requested.
+    ///
+    /// WARN: Does not handle duplicate keys, and will fail on duplicate requests of the same key.
+    ///
+    // TODO(zach): we can handle duplicate requests if we retain the ordering of the keys track
+    // that all of the keys prior to the one requested have been resolved.
+    #[derive(Debug)]
+    struct OrderedGetStore<T: ObjectStore> {
+        // The ObjectStore we are wrapping
+        inner: T,
+        // Combined state: queue and wakers, protected by a single mutex
+        state: Mutex<KeysAndWakers>,
+    }
+
+    #[derive(Debug)]
+    struct KeysAndWakers {
+        // Queue of paths in order which they will resolve
+        ordered_keys: VecDeque<Path>,
+        // Map of paths to wakers for pending get requests
+        wakers: HashMap<Path, Waker>,
+    }
+
+    impl<T: ObjectStore> OrderedGetStore<T> {
+        fn new(inner: T, ordered_keys: &[Path]) -> Self {
+            let ordered_keys = ordered_keys.to_vec();
+            // Check for duplicates
+            let mut seen = HashSet::new();
+            for key in ordered_keys.iter() {
+                if !seen.insert(key) {
+                    panic!("Duplicate key in OrderedGetStore: {}", key);
+                }
+            }
+
+            let state = KeysAndWakers {
+                ordered_keys: ordered_keys.into(),
+                wakers: HashMap::new(),
+            };
+
+            Self {
+                inner,
+                state: Mutex::new(state),
+            }
+        }
+    }
+
+    impl<T: ObjectStore> std::fmt::Display for OrderedGetStore<T> {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            let state = self.state.lock().unwrap();
+            write!(f, "OrderedGetStore({:?})", state.ordered_keys)
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl<T: ObjectStore> ObjectStore for OrderedGetStore<T> {
+        async fn put(&self, location: &Path, payload: PutPayload) -> Result<PutResult> {
+            self.inner.put(location, payload).await
+        }
+
+        async fn put_opts(
+            &self,
+            location: &Path,
+            payload: PutPayload,
+            opts: PutOptions,
+        ) -> Result<PutResult> {
+            self.inner.put_opts(location, payload, opts).await
+        }
+
+        async fn put_multipart(&self, location: &Path) -> Result<Box<dyn MultipartUpload>> {
+            self.inner.put_multipart(location).await
+        }
+
+        async fn put_multipart_opts(
+            &self,
+            location: &Path,
+            opts: PutMultipartOpts,
+        ) -> Result<Box<dyn MultipartUpload>> {
+            self.inner.put_multipart_opts(location, opts).await
+        }
+
+        // A GET request is fulfilled by checking if the requested path is next in order:
+        // - if yes, remove the path from the queue and proceed with the GET request, then wake the
+        //   next path in order
+        // - if no, register the waker and wait
+        async fn get(&self, location: &Path) -> Result<GetResult> {
+            // Do the actual GET request first, then introduce any artificial ordering delays as needed
+            let result = self.inner.get(location).await;
+
+            // we implement a future which only resolves once the requested path is next in order
+            future::poll_fn(move |cx| {
+                let mut state = self.state.lock().unwrap();
+                let Some(next_key) = state.ordered_keys.front() else {
+                    panic!("Ran out of keys before {location}");
+                };
+                if next_key == location {
+                    // We are next in line. Nobody else can remove our key, and our successor
+                    // cannot race with us to register itself because we hold the lock.
+                    //
+                    // first, remove our key from the queue.
+                    //
+                    // note: safe to unwrap because we just checked that the front key exists (and
+                    // is the same as our requested location)
+                    state.ordered_keys.pop_front().unwrap();
+
+                    // there are three possible cases, either:
+                    // 1. the key has already been requested, hence there is a waker waiting, and we
+                    //    need to wake it up
+                    // 2. the next key has no waker registered, in which case we do nothing, and
+                    //    whenever the request for said key is made, it will either be next in line
+                    //    or a waker will be registered - either case ensuring that the request is
+                    //    completed
+                    // 3. the next key is the last key in the queue, in which case there is nothing
+                    //    left to do (no need to wake anyone)
+                    if let Some(next_key) = state.ordered_keys.front().cloned() {
+                        if let Some(waker) = state.wakers.remove(&next_key) {
+                            waker.wake(); // NOTE: Not async, returns instantly.
+                        }
+                    }
+                    Poll::Ready(())
+                } else {
+                    // We are not next in line, so wait on our key. Nobody can race to remove it
+                    // because we own it; nobody can race to wake us because we hold the lock.
+                    if state
+                        .wakers
+                        .insert(location.clone(), cx.waker().clone())
+                        .is_some()
+                    {
+                        panic!("Somebody else is already waiting on {location}");
+                    }
+                    Poll::Pending
+                }
+            })
+            .await;
+
+            // When we return this result, the future succeeds instantly. Any pending wake() call
+            // will not be processed before the next time we yield -- unless our executor is
+            // multi-threaded and happens to have another thread available. In that case, the
+            // serialization point is the moment our next-key poll_fn issues the wake call (or
+            // proves no wake is needed).
+            result
+        }
+
+        async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
+            self.inner.get_opts(location, options).await
+        }
+
+        async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
+            self.inner.get_range(location, range).await
+        }
+
+        async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
+            self.inner.get_ranges(location, ranges).await
+        }
+
+        async fn head(&self, location: &Path) -> Result<ObjectMeta> {
+            self.inner.head(location).await
+        }
+
+        async fn delete(&self, location: &Path) -> Result<()> {
+            self.inner.delete(location).await
+        }
+
+        fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+            self.inner.list(prefix)
+        }
+
+        fn list_with_offset(
+            &self,
+            prefix: Option<&Path>,
+            offset: &Path,
+        ) -> BoxStream<'_, Result<ObjectMeta>> {
+            self.inner.list_with_offset(prefix, offset)
+        }
+
+        async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
+            self.inner.list_with_delimiter(prefix).await
+        }
+
+        async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
+            self.inner.copy(from, to).await
+        }
+
+        async fn rename(&self, from: &Path, to: &Path) -> Result<()> {
+            self.inner.rename(from, to).await
+        }
+
+        async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
+            self.inner.copy_if_not_exists(from, to).await
+        }
+
+        async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
+            self.inner.rename_if_not_exists(from, to).await
+        }
     }
 
     #[test]
@@ -273,7 +525,7 @@ mod tests {
             "./tests/data/table-with-dv-small/_delta_log/00000000000000000000.json",
         ))
         .unwrap();
-        let url = url::Url::from_file_path(path).unwrap();
+        let url = Url::from_file_path(path).unwrap();
         let location = Path::from(url.path());
         let meta = store.head(&location).await.unwrap();
 
@@ -286,21 +538,188 @@ mod tests {
         let handler = DefaultJsonHandler::new(store, Arc::new(TokioBackgroundExecutor::new()));
         let physical_schema = Arc::new(ArrowSchema::try_from(get_log_schema().as_ref()).unwrap());
         let data: Vec<RecordBatch> = handler
-            .read_json_files(files, Arc::new(physical_schema.try_into().unwrap()), None)
+            .read_json_files(files, get_log_schema().clone(), None)
             .unwrap()
-            .map(|ed_res| {
-                // TODO(nick) make this easier
-                ed_res.and_then(|ed| {
-                    ed.into_any()
-                        .downcast::<ArrowEngineData>()
-                        .map_err(|_| Error::engine_data_type("ArrowEngineData"))
-                        .map(|sd| sd.into())
-                })
-            })
+            .map_ok(into_record_batch)
             .try_collect()
             .unwrap();
 
         assert_eq!(data.len(), 1);
         assert_eq!(data[0].num_rows(), 4);
+
+        // limit batch size
+        let handler = handler.with_batch_size(2);
+        let data: Vec<RecordBatch> = handler
+            .read_json_files(files, Arc::new(physical_schema.try_into().unwrap()), None)
+            .unwrap()
+            .map_ok(into_record_batch)
+            .try_collect()
+            .unwrap();
+
+        assert_eq!(data.len(), 2);
+        assert_eq!(data[0].num_rows(), 2);
+        assert_eq!(data[1].num_rows(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_ordered_get_store() {
+        // note we don't want to go over 1000 since we only buffer 1000 requests at a time
+        let num_paths = 1000;
+        let ordered_paths: Vec<Path> = (0..num_paths)
+            .map(|i| Path::from(format!("/test/path{}", i)))
+            .collect();
+        let jumbled_paths: Vec<_> = ordered_paths[100..400]
+            .iter()
+            .chain(ordered_paths[400..].iter().rev())
+            .chain(ordered_paths[..100].iter())
+            .cloned()
+            .collect();
+
+        let memory_store = InMemory::new();
+        for (i, path) in ordered_paths.iter().enumerate() {
+            memory_store
+                .put(path, Bytes::from(format!("content_{}", i)).into())
+                .await
+                .unwrap();
+        }
+
+        // Create ordered store with natural order (0, 1, 2, ...)
+        let ordered_store = Arc::new(OrderedGetStore::new(memory_store, &ordered_paths));
+
+        let (tx, rx) = mpsc::channel();
+
+        // Spawn tasks to GET each path in our somewhat jumbled order
+        // They should complete in order (0, 1, 2, ...) due to OrderedGetStore
+        let handles = jumbled_paths.into_iter().map(|path| {
+            let store = ordered_store.clone();
+            let tx = tx.clone();
+            tokio::spawn(async move {
+                let _ = store.get(&path).await.unwrap();
+                tx.send(path).unwrap();
+            })
+        });
+
+        // TODO(zach): we need to join all the handles otherwise none of the tasks run? despite the
+        // docs?
+        future::join_all(handles).await;
+        drop(tx);
+
+        // NB (from mpsc::Receiver::recv): This function will always block the current thread if
+        // there is no data available and it's possible for more data to be sent (at least one
+        // sender still exists).
+        let mut completed = Vec::new();
+        while let Ok(path) = rx.recv() {
+            completed.push(path);
+        }
+
+        assert_eq!(
+            completed,
+            ordered_paths.into_iter().collect_vec(),
+            "Expected paths to complete in order"
+        );
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 3)]
+    async fn test_read_json_files_ordering() {
+        // this test checks that the read_json_files method returns the files in order in the
+        // presence of an ObjectStore (OrderedGetStore) that resolves paths in a jumbled order:
+        // 1. we set up a list of FileMetas (and some random JSON content) in order
+        // 2. we then set up an ObjectStore to resolves those paths in a jumbled order
+        // 3. then call read_json_files and check that the results are in order
+        let ordered_paths: Vec<Path> = (0..1000)
+            .map(|i| Path::from(format!("test/path{}", i)))
+            .collect();
+
+        let test_list: &[(usize, Vec<Path>)] = &[
+            // test 1: buffer_size = 1000, just 1000 jumbled paths
+            (
+                1000, // buffer_size
+                ordered_paths[100..400]
+                    .iter()
+                    .chain(ordered_paths[400..].iter().rev())
+                    .chain(ordered_paths[..100].iter())
+                    .cloned()
+                    .collect(),
+            ),
+            // test 2: buffer_size = 4, jumbled paths in groups of 4
+            (
+                4, // buffer_size
+                (0..250)
+                    .flat_map(|i| {
+                        [
+                            ordered_paths[1 + 4 * i].clone(),
+                            ordered_paths[4 * i].clone(),
+                            ordered_paths[3 + 4 * i].clone(),
+                            ordered_paths[2 + 4 * i].clone(),
+                        ]
+                    })
+                    .collect_vec(),
+            ),
+        ];
+
+        let memory_store = InMemory::new();
+        for (i, path) in ordered_paths.iter().enumerate() {
+            memory_store
+                .put(path, Bytes::from(format!("{{\"val\": {i}}}")).into())
+                .await
+                .unwrap();
+        }
+
+        for (buffer_size, jumbled_paths) in test_list {
+            // set up our ObjectStore to resolve paths in a jumbled order
+            let store = Arc::new(OrderedGetStore::new(memory_store.fork(), jumbled_paths));
+
+            // convert the paths to FileMeta
+            let ordered_file_meta: Vec<_> = ordered_paths
+                .iter()
+                .map(|path| {
+                    let store = store.clone();
+                    async move {
+                        let url = Url::parse(&format!("memory:/{}", path)).unwrap();
+                        let location = Path::from(path.as_ref());
+                        let meta = store.head(&location).await.unwrap();
+                        FileMeta {
+                            location: url,
+                            last_modified: meta.last_modified.timestamp_millis(),
+                            size: meta.size,
+                        }
+                    }
+                })
+                .collect();
+
+            // note: join_all is ordered
+            let files = future::join_all(ordered_file_meta).await;
+
+            // fire off the read_json_files call (for all the files in order)
+            let handler = DefaultJsonHandler::new(
+                store,
+                Arc::new(TokioMultiThreadExecutor::new(
+                    tokio::runtime::Handle::current(),
+                )),
+            );
+            let handler = handler.with_buffer_size(*buffer_size);
+            let schema = Arc::new(ArrowSchema::new(vec![Arc::new(Field::new(
+                "val",
+                DataType::Int32,
+                true,
+            ))]));
+            let physical_schema = Arc::new(schema.try_into().unwrap());
+            let data: Vec<RecordBatch> = handler
+                .read_json_files(&files, physical_schema, None)
+                .unwrap()
+                .map_ok(into_record_batch)
+                .try_collect()
+                .unwrap();
+
+            // check the order
+            let all_values: Vec<i32> = data
+                .iter()
+                .flat_map(|batch| {
+                    let val_col: &Int32Array = batch.column(0).as_primitive();
+                    (0..val_col.len()).map(|i| val_col.value(i)).collect_vec()
+                })
+                .collect();
+            assert_eq!(all_values, (0..1000).collect_vec());
+        }
     }
 }
diff --git a/kernel/src/engine/default/mod.rs b/kernel/src/engine/default/mod.rs
index d89cf29cd..49a008136 100644
--- a/kernel/src/engine/default/mod.rs
+++ b/kernel/src/engine/default/mod.rs
@@ -10,20 +10,19 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use self::storage::parse_url_opts;
-use object_store::{path::Path, DynObjectStore};
+use object_store::DynObjectStore;
 use url::Url;
 
 use self::executor::TaskExecutor;
-use self::filesystem::ObjectStoreFileSystemClient;
+use self::filesystem::ObjectStoreStorageHandler;
 use self::json::DefaultJsonHandler;
 use self::parquet::DefaultParquetHandler;
 use super::arrow_data::ArrowEngineData;
-use super::arrow_expression::ArrowExpressionHandler;
+use super::arrow_expression::ArrowEvaluationHandler;
 use crate::schema::Schema;
 use crate::transaction::WriteContext;
 use crate::{
-    DeltaResult, Engine, EngineData, ExpressionHandler, FileSystemClient, JsonHandler,
-    ParquetHandler,
+    DeltaResult, Engine, EngineData, EvaluationHandler, JsonHandler, ParquetHandler, StorageHandler,
 };
 
 pub mod executor;
@@ -35,11 +34,11 @@ pub mod storage;
 
 #[derive(Debug)]
 pub struct DefaultEngine<E: TaskExecutor> {
-    store: Arc<DynObjectStore>,
-    file_system: Arc<ObjectStoreFileSystemClient<E>>,
+    object_store: Arc<DynObjectStore>,
+    storage: Arc<ObjectStoreStorageHandler<E>>,
     json: Arc<DefaultJsonHandler<E>>,
     parquet: Arc<DefaultParquetHandler<E>>,
-    expression: Arc<ArrowExpressionHandler>,
+    expression: Arc<ArrowEvaluationHandler>,
 }
 
 impl<E: TaskExecutor> DefaultEngine<E> {
@@ -60,18 +59,17 @@ impl<E: TaskExecutor> DefaultEngine<E> {
         V: Into<String>,
     {
         // table root is the path of the table in the ObjectStore
-        let (store, table_root) = parse_url_opts(table_root, options)?;
-        Ok(Self::new(Arc::new(store), table_root, task_executor))
+        let (object_store, _table_root) = parse_url_opts(table_root, options)?;
+        Ok(Self::new(Arc::new(object_store), task_executor))
     }
 
     /// Create a new [`DefaultEngine`] instance
     ///
     /// # Parameters
     ///
-    /// - `store`: The object store to use.
-    /// - `table_root_path`: The root path of the table within storage.
+    /// - `object_store`: The object store to use.
     /// - `task_executor`: Used to spawn async IO tasks. See [executor::TaskExecutor].
-    pub fn new(store: Arc<DynObjectStore>, table_root: Path, task_executor: Arc<E>) -> Self {
+    pub fn new(object_store: Arc<DynObjectStore>, task_executor: Arc<E>) -> Self {
         // HACK to check if we're using a LocalFileSystem from ObjectStore. We need this because
         // local filesystem doesn't return a sorted list by default. Although the `object_store`
         // crate explicitly says it _does not_ return a sorted listing, in practice all the cloud
@@ -91,27 +89,29 @@ impl<E: TaskExecutor> DefaultEngine<E> {
         //   in your Cloud Storage buckets, which are ordered in the list lexicographically by name."
         // So we just need to know if we're local and then if so, we sort the returned file list in
         // `filesystem.rs`
-        let store_str = format!("{}", store);
+        let store_str = format!("{}", object_store);
         let is_local = store_str.starts_with("LocalFileSystem");
         Self {
-            file_system: Arc::new(ObjectStoreFileSystemClient::new(
-                store.clone(),
+            storage: Arc::new(ObjectStoreStorageHandler::new(
+                object_store.clone(),
                 !is_local,
-                table_root,
                 task_executor.clone(),
             )),
             json: Arc::new(DefaultJsonHandler::new(
-                store.clone(),
+                object_store.clone(),
                 task_executor.clone(),
             )),
-            parquet: Arc::new(DefaultParquetHandler::new(store.clone(), task_executor)),
-            store,
-            expression: Arc::new(ArrowExpressionHandler {}),
+            parquet: Arc::new(DefaultParquetHandler::new(
+                object_store.clone(),
+                task_executor,
+            )),
+            object_store,
+            expression: Arc::new(ArrowEvaluationHandler {}),
         }
     }
 
     pub fn get_object_store_for_url(&self, _url: &Url) -> Option<Arc<DynObjectStore>> {
-        Some(self.store.clone())
+        Some(self.object_store.clone())
     }
 
     pub async fn write_parquet(
@@ -124,7 +124,7 @@ impl<E: TaskExecutor> DefaultEngine<E> {
         let transform = write_context.logical_to_physical();
         let input_schema: Schema = data.record_batch().schema().try_into()?;
         let output_schema = write_context.schema();
-        let logical_to_physical_expr = self.get_expression_handler().get_evaluator(
+        let logical_to_physical_expr = self.evaluation_handler().new_expression_evaluator(
             input_schema.into(),
             transform.clone(),
             output_schema.clone().into(),
@@ -142,19 +142,90 @@ impl<E: TaskExecutor> DefaultEngine<E> {
 }
 
 impl<E: TaskExecutor> Engine for DefaultEngine<E> {
-    fn get_expression_handler(&self) -> Arc<dyn ExpressionHandler> {
+    fn evaluation_handler(&self) -> Arc<dyn EvaluationHandler> {
         self.expression.clone()
     }
 
-    fn get_file_system_client(&self) -> Arc<dyn FileSystemClient> {
-        self.file_system.clone()
+    fn storage_handler(&self) -> Arc<dyn StorageHandler> {
+        self.storage.clone()
     }
 
-    fn get_json_handler(&self) -> Arc<dyn JsonHandler> {
+    fn json_handler(&self) -> Arc<dyn JsonHandler> {
         self.json.clone()
     }
 
-    fn get_parquet_handler(&self) -> Arc<dyn ParquetHandler> {
+    fn parquet_handler(&self) -> Arc<dyn ParquetHandler> {
         self.parquet.clone()
     }
 }
+
+trait UrlExt {
+    // Check if a given url is a presigned url and can be used
+    // to access the object store via simple http requests
+    fn is_presigned(&self) -> bool;
+}
+
+impl UrlExt for Url {
+    fn is_presigned(&self) -> bool {
+        matches!(self.scheme(), "http" | "https")
+            && (
+                // https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+                // https://developers.cloudflare.com/r2/api/s3/presigned-urls/
+                self
+                .query_pairs()
+                .any(|(k, _)| k.eq_ignore_ascii_case("X-Amz-Signature")) ||
+                // https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas#version-2020-12-06-and-later
+                // note signed permission (sp) must always be present
+                self
+                .query_pairs().any(|(k, _)| k.eq_ignore_ascii_case("sp")) ||
+                // https://cloud.google.com/storage/docs/authentication/signatures
+                self
+                .query_pairs().any(|(k, _)| k.eq_ignore_ascii_case("X-Goog-Credential")) ||
+                // https://www.alibabacloud.com/help/en/oss/user-guide/upload-files-using-presigned-urls
+                self
+                .query_pairs().any(|(k, _)| k.eq_ignore_ascii_case("X-OSS-Credential"))
+            )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::executor::tokio::TokioBackgroundExecutor;
+    use super::*;
+    use crate::engine::tests::test_arrow_engine;
+    use object_store::local::LocalFileSystem;
+
+    #[test]
+    fn test_default_engine() {
+        let tmp = tempfile::tempdir().unwrap();
+        let url = Url::from_directory_path(tmp.path()).unwrap();
+        let object_store = Arc::new(LocalFileSystem::new());
+        let engine = DefaultEngine::new(object_store, Arc::new(TokioBackgroundExecutor::new()));
+        test_arrow_engine(&engine, &url);
+    }
+
+    #[test]
+    fn test_pre_signed_url() {
+        let url = Url::parse("https://example.com?X-Amz-Signature=foo").unwrap();
+        assert!(url.is_presigned());
+
+        let url = Url::parse("https://example.com?sp=foo").unwrap();
+        assert!(url.is_presigned());
+
+        let url = Url::parse("https://example.com?X-Goog-Credential=foo").unwrap();
+        assert!(url.is_presigned());
+
+        let url = Url::parse("https://example.com?X-OSS-Credential=foo").unwrap();
+        assert!(url.is_presigned());
+
+        // assert that query keys are case insensitive
+        let url = Url::parse("https://example.com?x-gooG-credenTIAL=foo").unwrap();
+        assert!(url.is_presigned());
+
+        let url = Url::parse("https://example.com?x-oss-CREDENTIAL=foo").unwrap();
+        assert!(url.is_presigned());
+
+        let url = Url::parse("https://example.com").unwrap();
+        assert!(!url.is_presigned());
+    }
+}
diff --git a/kernel/src/engine/default/parquet.rs b/kernel/src/engine/default/parquet.rs
index a65d329a2..8636b3d9f 100644
--- a/kernel/src/engine/default/parquet.rs
+++ b/kernel/src/engine/default/parquet.rs
@@ -4,21 +4,22 @@ use std::collections::HashMap;
 use std::ops::Range;
 use std::sync::Arc;
 
-use arrow_array::builder::{MapBuilder, MapFieldNames, StringBuilder};
-use arrow_array::{BooleanArray, Int64Array, RecordBatch, StringArray};
+use crate::arrow::array::builder::{MapBuilder, MapFieldNames, StringBuilder};
+use crate::arrow::array::{BooleanArray, Int64Array, RecordBatch, StringArray};
+use crate::parquet::arrow::arrow_reader::{
+    ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
+};
+use crate::parquet::arrow::arrow_writer::ArrowWriter;
+use crate::parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
 use futures::StreamExt;
 use object_store::path::Path;
 use object_store::DynObjectStore;
-use parquet::arrow::arrow_reader::{
-    ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
-};
-use parquet::arrow::arrow_writer::ArrowWriter;
-use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
 use uuid::Uuid;
 
 use super::file_stream::{FileOpenFuture, FileOpener, FileStream};
+use super::UrlExt;
 use crate::engine::arrow_data::ArrowEngineData;
-use crate::engine::arrow_utils::{generate_mask, get_requested_indices, reorder_struct_array};
+use crate::engine::arrow_utils::{fixup_parquet_read, generate_mask, get_requested_indices};
 use crate::engine::default::executor::TaskExecutor;
 use crate::engine::parquet_row_group_skipping::ParquetRowGroupSkipping;
 use crate::schema::SchemaRef;
@@ -191,18 +192,19 @@ impl<E: TaskExecutor> ParquetHandler for DefaultParquetHandler<E> {
         //   -> reqwest to get data
         //   -> parse to parquet
         // SAFETY: we did is_empty check above, this is ok.
-        let file_opener: Box<dyn FileOpener> = match files[0].location.scheme() {
-            "http" | "https" => Box::new(PresignedUrlOpener::new(
+        let file_opener: Box<dyn FileOpener> = if files[0].location.is_presigned() {
+            Box::new(PresignedUrlOpener::new(
                 1024,
                 physical_schema.clone(),
                 predicate,
-            )),
-            _ => Box::new(ParquetOpener::new(
+            ))
+        } else {
+            Box::new(ParquetOpener::new(
                 1024,
                 physical_schema.clone(),
                 predicate,
                 self.store.clone(),
-            )),
+            ))
         };
         FileStream::new_async_read_iterator(
             self.task_executor.clone(),
@@ -281,12 +283,7 @@ impl FileOpener for ParquetOpener {
 
             let stream = builder.with_batch_size(batch_size).build()?;
 
-            let stream = stream.map(move |rbr| {
-                // re-order each batch if needed
-                rbr.map_err(Error::Parquet).and_then(|rb| {
-                    reorder_struct_array(rb.into(), &requested_ordering).map(Into::into)
-                })
-            });
+            let stream = stream.map(move |rbr| fixup_parquet_read(rbr?, &requested_ordering));
             Ok(stream.boxed())
         }))
     }
@@ -355,12 +352,7 @@ impl FileOpener for PresignedUrlOpener {
             let reader = builder.with_batch_size(batch_size).build()?;
 
             let stream = futures::stream::iter(reader);
-            let stream = stream.map(move |rbr| {
-                // re-order each batch if needed
-                rbr.map_err(Error::Arrow).and_then(|rb| {
-                    reorder_struct_array(rb.into(), &requested_ordering).map(Into::into)
-                })
-            });
+            let stream = stream.map(move |rbr| fixup_parquet_read(rbr?, &requested_ordering));
             Ok(stream.boxed())
         }))
     }
@@ -371,8 +363,7 @@ mod tests {
     use std::path::PathBuf;
     use std::time::{SystemTime, UNIX_EPOCH};
 
-    use arrow_array::array::Array;
-    use arrow_array::RecordBatch;
+    use crate::arrow::array::{Array, RecordBatch};
     use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore};
     use url::Url;
 
@@ -519,7 +510,7 @@ mod tests {
             .try_into()
             .unwrap();
 
-        let filename = location.path().split('/').last().unwrap();
+        let filename = location.path().split('/').next_back().unwrap();
         assert_eq!(&expected_location.join(filename).unwrap(), location);
         assert_eq!(expected_size, size);
         assert!(now - last_modified < 10_000);
diff --git a/kernel/src/engine/ensure_data_types.rs b/kernel/src/engine/ensure_data_types.rs
index b6f186671..da699be07 100644
--- a/kernel/src/engine/ensure_data_types.rs
+++ b/kernel/src/engine/ensure_data_types.rs
@@ -5,7 +5,7 @@ use std::{
     ops::Deref,
 };
 
-use arrow_schema::{DataType as ArrowDataType, Field as ArrowField};
+use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField};
 use itertools::Itertools;
 
 use crate::{
@@ -256,7 +256,7 @@ fn metadata_eq(
 
 #[cfg(test)]
 mod tests {
-    use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, Fields};
+    use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField, Fields};
 
     use crate::{
         engine::ensure_data_types::ensure_data_types,
@@ -276,8 +276,8 @@ mod tests {
         assert!(can_upcast_to_decimal(&Decimal128(5, 1), 6u8, 2i8));
         assert!(can_upcast_to_decimal(
             &Decimal128(10, 5),
-            arrow_schema::DECIMAL128_MAX_PRECISION,
-            arrow_schema::DECIMAL128_MAX_SCALE - 5
+            crate::arrow::datatypes::DECIMAL128_MAX_PRECISION,
+            crate::arrow::datatypes::DECIMAL128_MAX_SCALE - 5
         ));
 
         assert!(can_upcast_to_decimal(&Int8, 3u8, 0i8));
diff --git a/kernel/src/engine/mod.rs b/kernel/src/engine/mod.rs
index 8ea07384a..c58b882f7 100644
--- a/kernel/src/engine/mod.rs
+++ b/kernel/src/engine/mod.rs
@@ -27,3 +27,80 @@ pub(crate) mod arrow_get_data;
 pub(crate) mod ensure_data_types;
 #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))]
 pub mod parquet_row_group_skipping;
+
+#[cfg(test)]
+mod tests {
+    use itertools::Itertools;
+    use object_store::path::Path;
+    use std::sync::Arc;
+    use url::Url;
+
+    use crate::arrow::array::{RecordBatch, StringArray};
+    use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema};
+    use crate::engine::arrow_data::ArrowEngineData;
+    use crate::{Engine, EngineData};
+
+    use test_utils::delta_path_for_version;
+
+    fn test_list_from_should_sort_and_filter(
+        engine: &dyn Engine,
+        base_url: &Url,
+        engine_data: impl Fn() -> Box<dyn EngineData>,
+    ) {
+        let json = engine.json_handler();
+        let get_data = || Box::new(std::iter::once(Ok(engine_data())));
+
+        let expected_names: Vec<Path> = (1..4)
+            .map(|i| delta_path_for_version(i, "json"))
+            .collect_vec();
+
+        for i in expected_names.iter().rev() {
+            let path = base_url.join(i.as_ref()).unwrap();
+            json.write_json_file(&path, get_data(), false).unwrap();
+        }
+        let path = base_url.join("other").unwrap();
+        json.write_json_file(&path, get_data(), false).unwrap();
+
+        let storage = engine.storage_handler();
+
+        // list files after an offset
+        let test_url = base_url.join(expected_names[0].as_ref()).unwrap();
+        let files: Vec<_> = storage.list_from(&test_url).unwrap().try_collect().unwrap();
+        assert_eq!(files.len(), expected_names.len() - 1);
+        for (file, expected) in files.iter().zip(expected_names.iter().skip(1)) {
+            assert_eq!(file.location, base_url.join(expected.as_ref()).unwrap());
+        }
+
+        let test_url = base_url
+            .join(delta_path_for_version(0, "json").as_ref())
+            .unwrap();
+        let files: Vec<_> = storage.list_from(&test_url).unwrap().try_collect().unwrap();
+        assert_eq!(files.len(), expected_names.len());
+
+        // list files inside a directory / key prefix
+        let test_url = base_url.join("_delta_log/").unwrap();
+        let files: Vec<_> = storage.list_from(&test_url).unwrap().try_collect().unwrap();
+        assert_eq!(files.len(), expected_names.len());
+        for (file, expected) in files.iter().zip(expected_names.iter()) {
+            assert_eq!(file.location, base_url.join(expected.as_ref()).unwrap());
+        }
+    }
+
+    fn get_arrow_data() -> Box<dyn EngineData> {
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "dog",
+            ArrowDataType::Utf8,
+            true,
+        )]));
+        let data = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(StringArray::from(vec!["remi", "wilson"]))],
+        )
+        .unwrap();
+        Box::new(ArrowEngineData::new(data))
+    }
+
+    pub(crate) fn test_arrow_engine(engine: &dyn Engine, base_url: &Url) {
+        test_list_from_should_sort_and_filter(engine, base_url, get_arrow_data);
+    }
+}
diff --git a/kernel/src/engine/parquet_row_group_skipping.rs b/kernel/src/engine/parquet_row_group_skipping.rs
index 79c87d923..2464ca455 100644
--- a/kernel/src/engine/parquet_row_group_skipping.rs
+++ b/kernel/src/engine/parquet_row_group_skipping.rs
@@ -1,15 +1,13 @@
 //! An implementation of parquet row group skipping using data skipping predicates over footer stats.
-use crate::expressions::{
-    BinaryExpression, ColumnName, Expression, Scalar, UnaryExpression, VariadicExpression,
-};
-use crate::predicates::parquet_stats_skipping::ParquetStatsProvider;
+use crate::expressions::{ColumnName, Expression, Scalar};
+use crate::kernel_predicates::parquet_stats_skipping::ParquetStatsProvider;
+use crate::parquet::arrow::arrow_reader::ArrowReaderBuilder;
+use crate::parquet::file::metadata::RowGroupMetaData;
+use crate::parquet::file::statistics::Statistics;
+use crate::parquet::schema::types::ColumnDescPtr;
 use crate::schema::{DataType, PrimitiveType};
 use chrono::{DateTime, Days};
-use parquet::arrow::arrow_reader::ArrowReaderBuilder;
-use parquet::file::metadata::RowGroupMetaData;
-use parquet::file::statistics::Statistics;
-use parquet::schema::types::ColumnDescPtr;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use tracing::debug;
 
 #[cfg(test)]
@@ -57,7 +55,7 @@ impl<'a> RowGroupFilter<'a> {
 
     /// Applies a filtering predicate to a row group. Return value false means to skip it.
     fn apply(row_group: &'a RowGroupMetaData, predicate: &Expression) -> bool {
-        use crate::predicates::PredicateEvaluator as _;
+        use crate::kernel_predicates::KernelPredicateEvaluator as _;
         RowGroupFilter::new(row_group, predicate).eval_sql_where(predicate) != Some(false)
     }
 
@@ -225,35 +223,19 @@ pub(crate) fn compute_field_indices(
     fields: &[ColumnDescPtr],
     expression: &Expression,
 ) -> HashMap<ColumnName, usize> {
-    fn do_recurse(expression: &Expression, cols: &mut HashSet<ColumnName>) {
-        use Expression::*;
-        let mut recurse = |expr| do_recurse(expr, cols); // simplifies the call sites below
-        match expression {
-            Literal(_) => {}
-            Column(name) => cols.extend([name.clone()]), // returns `()`, unlike `insert`
-            Struct(fields) => fields.iter().for_each(recurse),
-            Unary(UnaryExpression { expr, .. }) => recurse(expr),
-            Binary(BinaryExpression { left, right, .. }) => {
-                [left, right].iter().for_each(|e| recurse(e))
-            }
-            Variadic(VariadicExpression { exprs, .. }) => exprs.iter().for_each(recurse),
-        }
-    }
-
     // Build up a set of requested column paths, then take each found path as the corresponding map
     // key (avoids unnecessary cloning).
     //
     // NOTE: If a requested column was not available, it is silently ignored. These missing columns
     // are implied all-null, so we will infer their min/max stats as NULL and nullcount == rowcount.
-    let mut requested_columns = HashSet::new();
-    do_recurse(expression, &mut requested_columns);
+    let mut requested_columns = expression.references();
     fields
         .iter()
         .enumerate()
         .filter_map(|(i, f)| {
             requested_columns
                 .take(f.path().parts())
-                .map(|path| (path, i))
+                .map(|path| (path.clone(), i))
         })
         .collect()
 }
diff --git a/kernel/src/engine/parquet_row_group_skipping/tests.rs b/kernel/src/engine/parquet_row_group_skipping/tests.rs
index 37a3bb1b0..1ad2208db 100644
--- a/kernel/src/engine/parquet_row_group_skipping/tests.rs
+++ b/kernel/src/engine/parquet_row_group_skipping/tests.rs
@@ -1,8 +1,8 @@
 use super::*;
 use crate::expressions::{column_expr, column_name};
-use crate::predicates::DataSkippingPredicateEvaluator as _;
+use crate::kernel_predicates::DataSkippingPredicateEvaluator as _;
+use crate::parquet::arrow::arrow_reader::ArrowReaderMetadata;
 use crate::Expression;
-use parquet::arrow::arrow_reader::ArrowReaderMetadata;
 use std::fs::File;
 
 /// Performs an exhaustive set of reads against a specially crafted parquet file.
diff --git a/kernel/src/engine/sync/json.rs b/kernel/src/engine/sync/json.rs
index 3d33b1025..f2212cb81 100644
--- a/kernel/src/engine/sync/json.rs
+++ b/kernel/src/engine/sync/json.rs
@@ -1,6 +1,7 @@
 use std::{fs::File, io::BufReader, io::Write};
 
-use arrow_schema::SchemaRef as ArrowSchemaRef;
+use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef;
+use crate::arrow::json::ReaderBuilder;
 use tempfile::NamedTempFile;
 use url::Url;
 
@@ -22,7 +23,7 @@ fn try_create_from_json(
     arrow_schema: ArrowSchemaRef,
     _predicate: Option<ExpressionRef>,
 ) -> DeltaResult<impl Iterator<Item = DeltaResult<ArrowEngineData>>> {
-    let json = arrow_json::ReaderBuilder::new(arrow_schema)
+    let json = ReaderBuilder::new(arrow_schema)
         .build(BufReader::new(file))?
         .map(|data| Ok(ArrowEngineData::new(data?)));
     Ok(json)
@@ -65,6 +66,10 @@ impl JsonHandler for SyncJsonHandler {
             )));
         };
 
+        if !parent.exists() {
+            std::fs::create_dir_all(parent)?;
+        }
+
         // write data to tmp file
         let mut tmp_file = NamedTempFile::new_in(parent)?;
         let buf = to_json_bytes(data)?;
@@ -92,10 +97,8 @@ mod tests {
 
     use std::sync::Arc;
 
-    use arrow_array::{RecordBatch, StringArray};
-    use arrow_schema::DataType as ArrowDataType;
-    use arrow_schema::Field;
-    use arrow_schema::Schema as ArrowSchema;
+    use crate::arrow::array::{RecordBatch, StringArray};
+    use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema};
     use serde_json::json;
     use url::Url;
 
diff --git a/kernel/src/engine/sync/mod.rs b/kernel/src/engine/sync/mod.rs
index f637ec105..0c119396e 100644
--- a/kernel/src/engine/sync/mod.rs
+++ b/kernel/src/engine/sync/mod.rs
@@ -1,58 +1,58 @@
 //! A simple, single threaded, [`Engine`] that can only read from the local filesystem
 
-use super::arrow_expression::ArrowExpressionHandler;
+use super::arrow_expression::ArrowEvaluationHandler;
 use crate::engine::arrow_data::ArrowEngineData;
 use crate::{
-    DeltaResult, Engine, Error, ExpressionHandler, ExpressionRef, FileDataReadResultIterator,
-    FileMeta, FileSystemClient, JsonHandler, ParquetHandler, SchemaRef,
+    DeltaResult, Engine, Error, EvaluationHandler, ExpressionRef, FileDataReadResultIterator,
+    FileMeta, JsonHandler, ParquetHandler, SchemaRef, StorageHandler,
 };
 
-use arrow_schema::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
+use crate::arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
 use itertools::Itertools;
 use std::fs::File;
 use std::sync::Arc;
 use tracing::debug;
 
-mod fs_client;
 pub(crate) mod json;
 mod parquet;
+mod storage;
 
 /// This is a simple implementation of [`Engine`]. It only supports reading data from the local
 /// filesystem, and internally represents data using `Arrow`.
 pub struct SyncEngine {
-    fs_client: Arc<fs_client::SyncFilesystemClient>,
+    storage_handler: Arc<storage::SyncStorageHandler>,
     json_handler: Arc<json::SyncJsonHandler>,
     parquet_handler: Arc<parquet::SyncParquetHandler>,
-    expression_handler: Arc<ArrowExpressionHandler>,
+    evaluation_handler: Arc<ArrowEvaluationHandler>,
 }
 
 impl SyncEngine {
     #[allow(clippy::new_without_default)]
     pub fn new() -> Self {
         SyncEngine {
-            fs_client: Arc::new(fs_client::SyncFilesystemClient {}),
+            storage_handler: Arc::new(storage::SyncStorageHandler {}),
             json_handler: Arc::new(json::SyncJsonHandler {}),
             parquet_handler: Arc::new(parquet::SyncParquetHandler {}),
-            expression_handler: Arc::new(ArrowExpressionHandler {}),
+            evaluation_handler: Arc::new(ArrowEvaluationHandler {}),
         }
     }
 }
 
 impl Engine for SyncEngine {
-    fn get_expression_handler(&self) -> Arc<dyn ExpressionHandler> {
-        self.expression_handler.clone()
+    fn evaluation_handler(&self) -> Arc<dyn EvaluationHandler> {
+        self.evaluation_handler.clone()
     }
 
-    fn get_file_system_client(&self) -> Arc<dyn FileSystemClient> {
-        self.fs_client.clone()
+    fn storage_handler(&self) -> Arc<dyn StorageHandler> {
+        self.storage_handler.clone()
     }
 
     /// Get the connector provided [`ParquetHandler`].
-    fn get_parquet_handler(&self) -> Arc<dyn ParquetHandler> {
+    fn parquet_handler(&self) -> Arc<dyn ParquetHandler> {
         self.parquet_handler.clone()
     }
 
-    fn get_json_handler(&self) -> Arc<dyn JsonHandler> {
+    fn json_handler(&self) -> Arc<dyn JsonHandler> {
         self.json_handler.clone()
     }
 }
@@ -97,3 +97,17 @@ where
         .map(|data| Ok(Box::new(ArrowEngineData::new(data??.into())) as _));
     Ok(Box::new(result))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::engine::tests::test_arrow_engine;
+
+    #[test]
+    fn test_sync_engine() {
+        let tmp = tempfile::tempdir().unwrap();
+        let url = url::Url::from_directory_path(tmp.path()).unwrap();
+        let engine = SyncEngine::new();
+        test_arrow_engine(&engine, &url);
+    }
+}
diff --git a/kernel/src/engine/sync/parquet.rs b/kernel/src/engine/sync/parquet.rs
index 260ef321b..48010af30 100644
--- a/kernel/src/engine/sync/parquet.rs
+++ b/kernel/src/engine/sync/parquet.rs
@@ -1,11 +1,11 @@
 use std::fs::File;
 
-use arrow_schema::SchemaRef as ArrowSchemaRef;
-use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder};
+use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef;
+use crate::parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder};
 
 use super::read_files;
 use crate::engine::arrow_data::ArrowEngineData;
-use crate::engine::arrow_utils::{generate_mask, get_requested_indices, reorder_struct_array};
+use crate::engine::arrow_utils::{fixup_parquet_read, generate_mask, get_requested_indices};
 use crate::engine::parquet_row_group_skipping::ParquetRowGroupSkipping;
 use crate::schema::SchemaRef;
 use crate::{DeltaResult, ExpressionRef, FileDataReadResultIterator, FileMeta, ParquetHandler};
@@ -28,10 +28,8 @@ fn try_create_from_parquet(
     if let Some(predicate) = predicate {
         builder = builder.with_row_group_filter(predicate.as_ref());
     }
-    Ok(builder.build()?.map(move |data| {
-        let reordered = reorder_struct_array(data?.into(), &requested_ordering)?;
-        Ok(ArrowEngineData::new(reordered.into()))
-    }))
+    let stream = builder.build()?;
+    Ok(stream.map(move |rbr| fixup_parquet_read(rbr?, &requested_ordering)))
 }
 
 impl ParquetHandler for SyncParquetHandler {
diff --git a/kernel/src/engine/sync/fs_client.rs b/kernel/src/engine/sync/storage.rs
similarity index 88%
rename from kernel/src/engine/sync/fs_client.rs
rename to kernel/src/engine/sync/storage.rs
index 9577b1499..a2bd8f536 100644
--- a/kernel/src/engine/sync/fs_client.rs
+++ b/kernel/src/engine/sync/storage.rs
@@ -2,11 +2,11 @@ use bytes::Bytes;
 use itertools::Itertools;
 use url::Url;
 
-use crate::{DeltaResult, Error, FileMeta, FileSlice, FileSystemClient};
+use crate::{DeltaResult, Error, FileMeta, FileSlice, StorageHandler};
 
-pub(crate) struct SyncFilesystemClient;
+pub(crate) struct SyncStorageHandler;
 
-impl FileSystemClient for SyncFilesystemClient {
+impl StorageHandler for SyncStorageHandler {
     /// List the paths in the same directory that are lexicographically greater or equal to
     /// (UTF-8 sorting) the given `path`. The result is sorted by the file name.
     fn list_from(
@@ -39,7 +39,7 @@ impl FileSystemClient for SyncFilesystemClient {
             let all_ents: Vec<_> = std::fs::read_dir(path_to_read)?
                 .filter(|ent_res| {
                     match (ent_res, min_file_name) {
-                        (Ok(ent), Some(min_file_name)) => ent.file_name() >= *min_file_name,
+                        (Ok(ent), Some(min_file_name)) => ent.file_name() > *min_file_name,
                         _ => true, // Keep unfiltered and/or error entries
                     }
                 })
@@ -86,8 +86,8 @@ mod tests {
 
     use test_utils::abs_diff;
 
-    use super::SyncFilesystemClient;
-    use crate::FileSystemClient;
+    use super::SyncStorageHandler;
+    use crate::StorageHandler;
 
     /// generate json filenames that follow the spec (numbered padded to 20 chars)
     fn get_json_filename(index: usize) -> String {
@@ -96,7 +96,7 @@ mod tests {
 
     #[test]
     fn test_file_meta_is_correct() -> Result<(), Box<dyn std::error::Error>> {
-        let client = SyncFilesystemClient;
+        let storage = SyncStorageHandler;
         let tmp_dir = tempfile::tempdir().unwrap();
 
         let begin_time = SystemTime::now().duration_since(UNIX_EPOCH)?;
@@ -106,9 +106,9 @@ mod tests {
         writeln!(f, "null")?;
         f.flush()?;
 
-        let url_path = tmp_dir.path().join(get_json_filename(1));
+        let url_path = tmp_dir.path().join(get_json_filename(0));
         let url = Url::from_file_path(url_path).unwrap();
-        let files: Vec<_> = client.list_from(&url)?.try_collect()?;
+        let files: Vec<_> = storage.list_from(&url)?.try_collect()?;
 
         assert!(!files.is_empty());
         for meta in files.iter() {
@@ -120,7 +120,7 @@ mod tests {
 
     #[test]
     fn test_list_from() -> Result<(), Box<dyn std::error::Error>> {
-        let client = SyncFilesystemClient;
+        let storage = SyncStorageHandler;
         let tmp_dir = tempfile::tempdir().unwrap();
         let mut expected = vec![];
         for i in 0..3 {
@@ -131,27 +131,27 @@ mod tests {
         }
         let url_path = tmp_dir.path().join(get_json_filename(1));
         let url = Url::from_file_path(url_path).unwrap();
-        let list = client.list_from(&url)?;
+        let list = storage.list_from(&url)?;
         let mut file_count = 0;
         for (i, file) in list.enumerate() {
             // i+1 in index because we started at 0001 in the listing
             assert_eq!(
                 file?.location.to_file_path().unwrap().to_str().unwrap(),
-                expected[i + 1].to_str().unwrap()
+                expected[i + 2].to_str().unwrap()
             );
             file_count += 1;
         }
-        assert_eq!(file_count, 2);
+        assert_eq!(file_count, 1);
 
         let url_path = tmp_dir.path().join("");
         let url = Url::from_file_path(url_path).unwrap();
-        let list = client.list_from(&url)?;
+        let list = storage.list_from(&url)?;
         file_count = list.count();
         assert_eq!(file_count, 3);
 
         let url_path = tmp_dir.path().join(format!("{:020}", 1));
         let url = Url::from_file_path(url_path).unwrap();
-        let list = client.list_from(&url)?;
+        let list = storage.list_from(&url)?;
         file_count = list.count();
         assert_eq!(file_count, 2);
         Ok(())
@@ -159,14 +159,14 @@ mod tests {
 
     #[test]
     fn test_read_files() -> Result<(), Box<dyn std::error::Error>> {
-        let client = SyncFilesystemClient;
+        let storage = SyncStorageHandler;
         let tmp_dir = tempfile::tempdir().unwrap();
         let path = tmp_dir.path().join(get_json_filename(1));
         let mut f = File::create(path.clone())?;
         writeln!(f, "null")?;
         let url = Url::from_file_path(path).unwrap();
         let file_slice = (url.clone(), None);
-        let read = client.read_files(vec![file_slice])?;
+        let read = storage.read_files(vec![file_slice])?;
         let mut file_count = 0;
         let mut buf = BytesMut::with_capacity(16);
         buf.put(&b"null\n"[..]);
diff --git a/kernel/src/engine_data.rs b/kernel/src/engine_data.rs
index 333ced827..54cce0e26 100644
--- a/kernel/src/engine_data.rs
+++ b/kernel/src/engine_data.rs
@@ -7,6 +7,19 @@ use tracing::debug;
 
 use std::collections::HashMap;
 
+/// Engine data paired with a selection vector indicating which rows are logically selected.
+///
+/// A value of `true` in the selection vector means the corresponding row is selected (i.e., not deleted),
+/// while `false` means the row is logically deleted and should be ignored.
+///
+/// Interpreting unselected (`false`) rows will result in incorrect/undefined behavior.
+pub struct FilteredEngineData {
+    // The underlying engine data
+    pub data: Box<dyn EngineData>,
+    // The selection vector where `true` marks rows to include in results
+    pub selection_vector: Vec<bool>,
+}
+
 /// a trait that an engine exposes to give access to a list
 pub trait EngineList {
     /// Return the length of the list at the specified row_index in the raw data
diff --git a/kernel/src/error.rs b/kernel/src/error.rs
index 815ef3e51..80857b856 100644
--- a/kernel/src/error.rs
+++ b/kernel/src/error.rs
@@ -10,6 +10,9 @@ use crate::schema::{DataType, StructType};
 use crate::table_properties::ParseIntervalError;
 use crate::Version;
 
+#[cfg(any(feature = "default-engine-base", feature = "sync-engine"))]
+use crate::arrow::error::ArrowError;
+
 /// A [`std::result::Result`] that has the kernel [`Error`] as the error variant
 pub type DeltaResult<T, E = Error> = std::result::Result<T, E>;
 
@@ -29,7 +32,7 @@ pub enum Error {
     /// An error performing operations on arrow data
     #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))]
     #[error(transparent)]
-    Arrow(arrow_schema::ArrowError),
+    Arrow(ArrowError),
 
     /// User tried to convert engine data to the wrong type
     #[error("Invalid engine data type. Could not convert to {0}")]
@@ -58,10 +61,10 @@ pub enum Error {
     #[error("Internal error {0}. This is a kernel bug, please report.")]
     InternalError(String),
 
-    /// An error encountered while working with parquet data
-    #[cfg(feature = "parquet")]
+    /// An error enountered while working with parquet data
+    #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))]
     #[error("Arrow error: {0}")]
-    Parquet(#[from] parquet::errors::ParquetError),
+    Parquet(#[from] crate::parquet::errors::ParquetError),
 
     /// An error interacting with the object_store crate
     // We don't use [#from] object_store::Error here as our From impl transforms
@@ -195,6 +198,12 @@ pub enum Error {
     /// Invalid checkpoint files
     #[error("Invalid Checkpoint: {0}")]
     InvalidCheckpoint(String),
+
+    /// Error while transforming a schema + leaves into an Expression of literals
+    #[error(transparent)]
+    LiteralExpressionTransformError(
+        #[from] crate::expressions::literal_expression_transform::Error,
+    ),
 }
 
 // Convenience constructors for Error types that take a String argument
@@ -304,8 +313,8 @@ from_with_backtrace!(
 );
 
 #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))]
-impl From<arrow_schema::ArrowError> for Error {
-    fn from(value: arrow_schema::ArrowError) -> Self {
+impl From<ArrowError> for Error {
+    fn from(value: ArrowError) -> Self {
         Self::Arrow(value).with_backtrace()
     }
 }
diff --git a/kernel/src/expressions/literal_expression_transform.rs b/kernel/src/expressions/literal_expression_transform.rs
new file mode 100644
index 000000000..2d2276c11
--- /dev/null
+++ b/kernel/src/expressions/literal_expression_transform.rs
@@ -0,0 +1,517 @@
+//! The [`LiteralExpressionTransform`] is a [`SchemaTransform`] that transforms a [`Schema`] and an
+//! ordered list of leaf values (scalars) into an [`Expression`] with a literal value for each leaf.
+
+use std::borrow::Cow;
+use std::mem;
+
+use tracing::debug;
+
+use crate::expressions::{Expression, Scalar};
+use crate::schema::{
+    ArrayType, DataType, MapType, PrimitiveType, SchemaTransform, StructField, StructType,
+};
+
+/// [`SchemaTransform`] that will transform a [`Schema`] and an ordered list of leaf values
+/// (Scalars) into an Expression with a [`Literal`] expr for each leaf.
+#[derive(Debug)]
+pub(crate) struct LiteralExpressionTransform<'a, T: Iterator<Item = &'a Scalar>> {
+    /// Leaf values to insert in schema order.
+    scalars: T,
+    /// A stack of built Expressions. After visiting children, we pop them off to
+    /// build the parent container, then push the parent back on.
+    stack: Vec<Expression>,
+    /// Since schema transforms are infallible we keep track of errors here
+    error: Result<(), Error>,
+}
+
+/// Any error for [`LiteralExpressionTransform`]
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    /// Schema mismatch error
+    #[error("Schema error: {0}")]
+    Schema(String),
+
+    /// Insufficient number of scalars (too many) to create a single-row expression
+    #[error("Excess scalar: {0} given for literal expression transform")]
+    ExcessScalars(Scalar),
+
+    /// Insufficient number of scalars (too few) to create a single-row expression
+    #[error("Too few scalars given for literal expression transform")]
+    InsufficientScalars,
+
+    /// Empty expression stack after performing the transform
+    #[error("No Expression was created after performing the transform")]
+    EmptyStack,
+
+    /// Unsupported operation
+    #[error("Unsupported operation: {0}")]
+    Unsupported(String),
+}
+
+impl<'a, I: Iterator<Item = &'a Scalar>> LiteralExpressionTransform<'a, I> {
+    pub(crate) fn new(scalars: impl IntoIterator<IntoIter = I>) -> Self {
+        Self {
+            scalars: scalars.into_iter(),
+            stack: Vec::new(),
+            error: Ok(()),
+        }
+    }
+
+    /// return the Expression we just built (or propagate Error). the top of `stack` should be our
+    /// final Expression
+    pub(crate) fn try_into_expr(mut self) -> Result<Expression, Error> {
+        self.error?;
+
+        if let Some(s) = self.scalars.next() {
+            return Err(Error::ExcessScalars(s.clone()));
+        }
+
+        self.stack.pop().ok_or(Error::EmptyStack)
+    }
+
+    fn set_error(&mut self, error: Error) {
+        if let Err(e) = mem::replace(&mut self.error, Err(error)) {
+            debug!("Overwriting error that was already set: {e}");
+        }
+    }
+}
+
+impl<'a, T: Iterator<Item = &'a Scalar>> SchemaTransform<'a> for LiteralExpressionTransform<'a, T> {
+    fn transform_primitive(
+        &mut self,
+        prim_type: &'a PrimitiveType,
+    ) -> Option<Cow<'a, PrimitiveType>> {
+        // first always check error to terminate early if possible
+        self.error.as_ref().ok()?;
+
+        let Some(scalar) = self.scalars.next() else {
+            self.set_error(Error::InsufficientScalars);
+            return None;
+        };
+
+        let DataType::Primitive(scalar_type) = scalar.data_type() else {
+            self.set_error(Error::Schema(
+                "Non-primitive scalar type {datatype} provided".to_string(),
+            ));
+            return None;
+        };
+        if scalar_type != *prim_type {
+            self.set_error(Error::Schema(format!(
+                "Mismatched scalar type while creating Expression: expected {}, got {}",
+                prim_type, scalar_type
+            )));
+            return None;
+        }
+
+        self.stack.push(Expression::Literal(scalar.clone()));
+        None
+    }
+
+    fn transform_struct(&mut self, struct_type: &'a StructType) -> Option<Cow<'a, StructType>> {
+        // first always check error to terminate early if possible
+        self.error.as_ref().ok()?;
+
+        // Only consume newly-added entries (if any). There could be fewer than expected if
+        // the recursion encountered an error.
+        let mark = self.stack.len();
+        self.recurse_into_struct(struct_type)?;
+        let field_exprs = self.stack.split_off(mark);
+
+        if field_exprs.len() != struct_type.fields_len() {
+            self.set_error(Error::InsufficientScalars);
+            return None;
+        }
+
+        let mut found_non_nullable_null = false;
+        let mut all_null = true;
+        let fields = struct_type.fields();
+        for (field, expr) in fields.zip(&field_exprs) {
+            if !matches!(expr, Expression::Literal(Scalar::Null(_))) {
+                all_null = false;
+            } else if !field.is_nullable() {
+                found_non_nullable_null = true;
+            }
+        }
+
+        // If all children are NULL and at least one is ostensibly non-nullable, we interpret
+        // the struct itself as being NULL (if all aren't null then it's an error)
+        let struct_expr = if found_non_nullable_null {
+            if !all_null {
+                // we found a non_nullable NULL, but other siblings are non-null: error
+                self.set_error(Error::Schema(
+                    "NULL value for non-nullable struct field with non-NULL siblings".to_string(),
+                ));
+                return None;
+            }
+            Expression::null_literal(struct_type.clone().into())
+        } else {
+            Expression::struct_from(field_exprs)
+        };
+
+        self.stack.push(struct_expr);
+        None
+    }
+
+    fn transform_struct_field(&mut self, field: &'a StructField) -> Option<Cow<'a, StructField>> {
+        // first always check error to terminate early if possible
+        self.error.as_ref().ok()?;
+
+        self.recurse_into_struct_field(field);
+        Some(Cow::Borrowed(field))
+    }
+
+    // arrays unsupported for now
+    fn transform_array(&mut self, _array_type: &'a ArrayType) -> Option<Cow<'a, ArrayType>> {
+        self.error.as_ref().ok()?;
+        self.set_error(Error::Unsupported(
+            "ArrayType not yet supported in literal expression transform".to_string(),
+        ));
+        None
+    }
+
+    // maps unsupported for now
+    fn transform_map(&mut self, _map_type: &'a MapType) -> Option<Cow<'a, MapType>> {
+        self.error.as_ref().ok()?;
+        self.set_error(Error::Unsupported(
+            "MapType not yet supported in literal expression transform".to_string(),
+        ));
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::sync::Arc;
+
+    use crate::schema::SchemaRef;
+    use crate::schema::StructType;
+    use crate::DataType as DeltaDataTypes;
+
+    use paste::paste;
+
+    // helper to take values/schema to pass to `create_one` and assert the result = expected
+    fn assert_single_row_transform(
+        values: &[Scalar],
+        schema: SchemaRef,
+        expected: Result<Expression, ()>,
+    ) {
+        let mut schema_transform = LiteralExpressionTransform::new(values);
+        let datatype = schema.into();
+        let _transformed = schema_transform.transform(&datatype);
+        match expected {
+            Ok(expected_expr) => {
+                let actual_expr = schema_transform.try_into_expr().unwrap();
+                // TODO: we can't compare NULLs so we convert with .to_string to workaround
+                // see: https://github.com/delta-io/delta-kernel-rs/pull/677
+                assert_eq!(expected_expr.to_string(), actual_expr.to_string());
+            }
+            Err(()) => {
+                assert!(schema_transform.try_into_expr().is_err());
+            }
+        }
+    }
+
+    #[test]
+    fn test_create_one_top_level_null() {
+        let values = &[Scalar::Null(DeltaDataTypes::INTEGER)];
+
+        let schema = Arc::new(StructType::new([StructField::not_null(
+            "col_1",
+            DeltaDataTypes::INTEGER,
+        )]));
+        let expected = Expression::null_literal(schema.clone().into());
+        assert_single_row_transform(values, schema, Ok(expected));
+
+        let schema = Arc::new(StructType::new([StructField::nullable(
+            "col_1",
+            DeltaDataTypes::INTEGER,
+        )]));
+        let expected =
+            Expression::struct_from(vec![Expression::null_literal(DeltaDataTypes::INTEGER)]);
+        assert_single_row_transform(values, schema, Ok(expected));
+    }
+
+    #[test]
+    fn test_create_one_missing_values() {
+        let values = &[1.into()];
+        let schema = Arc::new(StructType::new([
+            StructField::nullable("col_1", DeltaDataTypes::INTEGER),
+            StructField::nullable("col_2", DeltaDataTypes::INTEGER),
+        ]));
+        assert_single_row_transform(values, schema, Err(()));
+    }
+
+    #[test]
+    fn test_create_one_extra_values() {
+        let values = &[1.into(), 2.into(), 3.into()];
+        let schema = Arc::new(StructType::new([
+            StructField::nullable("col_1", DeltaDataTypes::INTEGER),
+            StructField::nullable("col_2", DeltaDataTypes::INTEGER),
+        ]));
+        assert_single_row_transform(values, schema, Err(()));
+    }
+
+    #[test]
+    fn test_create_one_incorrect_schema() {
+        let values = &["a".into()];
+        let schema = Arc::new(StructType::new([StructField::nullable(
+            "col_1",
+            DeltaDataTypes::INTEGER,
+        )]));
+        assert_single_row_transform(values, schema, Err(()));
+    }
+
+    // useful test to make sure that we correctly process the stack
+    #[test]
+    fn test_many_structs() {
+        let values: &[Scalar] = &[1.into(), 2.into(), 3.into(), 4.into()];
+        let schema = Arc::new(StructType::new([
+            StructField::nullable(
+                "x",
+                DeltaDataTypes::struct_type([
+                    StructField::not_null("a", DeltaDataTypes::INTEGER),
+                    StructField::nullable("b", DeltaDataTypes::INTEGER),
+                ]),
+            ),
+            StructField::nullable(
+                "y",
+                DeltaDataTypes::struct_type([
+                    StructField::not_null("c", DeltaDataTypes::INTEGER),
+                    StructField::nullable("d", DeltaDataTypes::INTEGER),
+                ]),
+            ),
+        ]));
+        let expected = Expression::struct_from(vec![
+            Expression::struct_from(vec![Expression::literal(1), Expression::literal(2)]),
+            Expression::struct_from(vec![Expression::literal(3), Expression::literal(4)]),
+        ]);
+        assert_single_row_transform(values, schema, Ok(expected));
+    }
+
+    #[derive(Clone, Copy)]
+    struct TestSchema {
+        x_nullable: bool,
+        a_nullable: bool,
+        b_nullable: bool,
+    }
+
+    enum Expected {
+        Noop,
+        NullStruct,
+        Null,
+        Error, // TODO: we could check the actual error
+    }
+
+    fn run_test(test_schema: TestSchema, values: (Option<i32>, Option<i32>), expected: Expected) {
+        let (a_val, b_val) = values;
+        let a = match a_val {
+            Some(v) => Scalar::Integer(v),
+            None => Scalar::Null(DeltaDataTypes::INTEGER),
+        };
+        let b = match b_val {
+            Some(v) => Scalar::Integer(v),
+            None => Scalar::Null(DeltaDataTypes::INTEGER),
+        };
+        let values: &[Scalar] = &[a, b];
+
+        let field_a = StructField::new("a", DeltaDataTypes::INTEGER, test_schema.a_nullable);
+        let field_b = StructField::new("b", DeltaDataTypes::INTEGER, test_schema.b_nullable);
+        let field_x = StructField::new(
+            "x",
+            StructType::new([field_a.clone(), field_b.clone()]),
+            test_schema.x_nullable,
+        );
+        let schema = Arc::new(StructType::new([field_x.clone()]));
+
+        let expected_result = match expected {
+            Expected::Noop => {
+                let nested_struct = Expression::struct_from(vec![
+                    Expression::literal(values[0].clone()),
+                    Expression::literal(values[1].clone()),
+                ]);
+                Ok(Expression::struct_from([nested_struct]))
+            }
+            Expected::Null => Ok(Expression::null_literal(schema.clone().into())),
+            Expected::NullStruct => {
+                let nested_null = Expression::null_literal(field_x.data_type().clone());
+                Ok(Expression::struct_from([nested_null]))
+            }
+            Expected::Error => Err(()),
+        };
+
+        assert_single_row_transform(values, schema, expected_result);
+    }
+
+    // helper to convert nullable/not_null to bool
+    macro_rules! bool_from_nullable {
+        (nullable) => {
+            true
+        };
+        (not_null) => {
+            false
+        };
+    }
+
+    // helper to convert a/b/N to Some/Some/None (1 and 2 just arbitrary non-null ints)
+    macro_rules! parse_value {
+        (a) => {
+            Some(1)
+        };
+        (b) => {
+            Some(2)
+        };
+        (N) => {
+            None
+        };
+    }
+
+    macro_rules! test_nullability_combinations {
+    (
+        name = $name:ident,
+        schema = { x: $x:ident, a: $a:ident, b: $b:ident },
+        tests = {
+            ($ta1:tt, $tb1:tt) -> $expected1:ident,
+            ($ta2:tt, $tb2:tt) -> $expected2:ident,
+            ($ta3:tt, $tb3:tt) -> $expected3:ident,
+            ($ta4:tt, $tb4:tt) -> $expected4:ident $(,)?
+        }
+    ) => {
+        paste! {
+            #[test]
+            fn [<$name _ $ta1:lower _ $tb1:lower>]() {
+                let schema = TestSchema {
+                    x_nullable: bool_from_nullable!($x),
+                    a_nullable: bool_from_nullable!($a),
+                    b_nullable: bool_from_nullable!($b),
+                };
+                run_test(schema, (parse_value!($ta1), parse_value!($tb1)), Expected::$expected1);
+            }
+            #[test]
+            fn [<$name _ $ta2:lower _ $tb2:lower>]() {
+                let schema = TestSchema {
+                    x_nullable: bool_from_nullable!($x),
+                    a_nullable: bool_from_nullable!($a),
+                    b_nullable: bool_from_nullable!($b),
+                };
+                run_test(schema, (parse_value!($ta2), parse_value!($tb2)), Expected::$expected2);
+            }
+            #[test]
+            fn [<$name _ $ta3:lower _ $tb3:lower>]() {
+                let schema = TestSchema {
+                    x_nullable: bool_from_nullable!($x),
+                    a_nullable: bool_from_nullable!($a),
+                    b_nullable: bool_from_nullable!($b),
+                };
+                run_test(schema, (parse_value!($ta3), parse_value!($tb3)), Expected::$expected3);
+            }
+            #[test]
+            fn [<$name _ $ta4:lower _ $tb4:lower>]() {
+                let schema = TestSchema {
+                    x_nullable: bool_from_nullable!($x),
+                    a_nullable: bool_from_nullable!($a),
+                    b_nullable: bool_from_nullable!($b),
+                };
+                run_test(schema, (parse_value!($ta4), parse_value!($tb4)), Expected::$expected4);
+            }
+        }
+    }
+    }
+
+    // Group 1: nullable { nullable, nullable }
+    //  1. (a, b) -> x (a, b)
+    //  2. (N, b) -> x (N, b)
+    //  3. (a, N) -> x (a, N)
+    //  4. (N, N) -> x (N, N)
+    test_nullability_combinations! {
+        name = test_all_nullable,
+        schema = { x: nullable, a: nullable, b: nullable },
+        tests = {
+            (a, b) -> Noop,
+            (N, b) -> Noop,
+            (a, N) -> Noop,
+            (N, N) -> Noop,
+        }
+    }
+
+    // Group 2: nullable { nullable, not_null }
+    //  1. (a, b) -> x (a, b)
+    //  2. (N, b) -> x (N, b)
+    //  3. (a, N) -> Err
+    //  4. (N, N) -> x NULL
+    test_nullability_combinations! {
+        name = test_nullable_nullable_not_null,
+        schema = { x: nullable, a: nullable, b: not_null },
+        tests = {
+            (a, b) -> Noop,
+            (N, b) -> Noop,
+            (a, N) -> Error,
+            (N, N) -> NullStruct,
+        }
+    }
+
+    // Group 3: nullable { not_null, not_null }
+    //  1. (a, b) -> x (a, b)
+    //  2. (N, b) -> Err
+    //  3. (a, N) -> Err
+    //  4. (N, N) -> x NULL
+    test_nullability_combinations! {
+        name = test_nullable_not_null_not_null,
+        schema = { x: nullable, a: not_null, b: not_null },
+        tests = {
+            (a, b) -> Noop,
+            (N, b) -> Error,
+            (a, N) -> Error,
+            (N, N) -> NullStruct,
+        }
+    }
+
+    // Group 4: not_null { nullable, nullable }
+    //  1. (a, b) -> x (a, b)
+    //  2. (N, b) -> x (N, b)
+    //  3. (a, N) -> x (a, N)
+    //  4. (N, N) -> x (N, N)
+    test_nullability_combinations! {
+        name = test_not_null_nullable_nullable,
+        schema = { x: not_null, a: nullable, b: nullable },
+        tests = {
+            (a, b) -> Noop,
+            (N, b) -> Noop,
+            (a, N) -> Noop,
+            (N, N) -> Noop,
+        }
+    }
+
+    // Group 5: not_null { nullable, not_null }
+    //  1. (a, b) -> x (a, b)
+    //  2. (N, b) -> x (N, b)
+    //  3. (a, N) -> Err
+    //  4. (N, N) -> NULL
+    test_nullability_combinations! {
+        name = test_not_null_nullable_not_null,
+        schema = { x: not_null, a: nullable, b: not_null },
+        tests = {
+            (a, b) -> Noop,
+            (N, b) -> Noop,
+            (a, N) -> Error,
+            (N, N) -> Null,
+        }
+    }
+
+    // Group 6: not_null { not_null, not_null }
+    //  1. (a, b) -> x (a, b)
+    //  2. (N, b) -> Err
+    //  3. (a, N) -> Err
+    //  4. (N, N) -> NULL
+    test_nullability_combinations! {
+        name = test_all_not_null,
+        schema = { x: not_null, a: not_null, b: not_null },
+        tests = {
+            (a, b) -> Noop,
+            (N, b) -> Error,
+            (a, N) -> Error,
+            (N, N) -> Null,
+        }
+    }
+}
diff --git a/kernel/src/expressions/mod.rs b/kernel/src/expressions/mod.rs
index 9f4972408..b3de5c4c3 100644
--- a/kernel/src/expressions/mod.rs
+++ b/kernel/src/expressions/mod.rs
@@ -15,6 +15,8 @@ use crate::DataType;
 mod column_names;
 mod scalars;
 
+pub(crate) mod literal_expression_transform;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 /// A binary operator.
 pub enum BinaryOperator {
@@ -235,15 +237,9 @@ impl Display for Expression {
 impl Expression {
     /// Returns a set of columns referenced by this expression.
     pub fn references(&self) -> HashSet<&ColumnName> {
-        let mut set = HashSet::new();
-
-        for expr in self.walk() {
-            if let Self::Column(name) = expr {
-                set.insert(name);
-            }
-        }
-
-        set
+        let mut references = GetColumnReferences::default();
+        let _ = references.transform(self);
+        references.into_inner()
     }
 
     /// Create a new column name expression from input satisfying `FromIterator for ColumnName`.
@@ -369,26 +365,6 @@ impl Expression {
     pub fn distinct(self, other: impl Into<Self>) -> Self {
         Self::binary(BinaryOperator::Distinct, self, other)
     }
-
-    fn walk(&self) -> impl Iterator<Item = &Self> + '_ {
-        use Expression::*;
-        let mut stack = vec![self];
-        std::iter::from_fn(move || {
-            let expr = stack.pop()?;
-            match expr {
-                Literal(_) => {}
-                Column { .. } => {}
-                Struct(exprs) => stack.extend(exprs),
-                Unary(UnaryExpression { expr, .. }) => stack.push(expr),
-                Binary(BinaryExpression { left, right, .. }) => {
-                    stack.push(left);
-                    stack.push(right);
-                }
-                Variadic(VariadicExpression { exprs, .. }) => stack.extend(exprs),
-            }
-            Some(expr)
-        })
-    }
 }
 
 /// Generic framework for recursive bottom-up expression transforms. Transformations return
@@ -604,6 +580,25 @@ impl<R: Into<Expression>> std::ops::Div<R> for Expression {
     }
 }
 
+/// Retrieves the set of column names referenced by an expression.
+#[derive(Default)]
+pub(crate) struct GetColumnReferences<'a> {
+    references: HashSet<&'a ColumnName>,
+}
+
+impl<'a> GetColumnReferences<'a> {
+    pub(crate) fn into_inner(self) -> HashSet<&'a ColumnName> {
+        self.references
+    }
+}
+
+impl<'a> ExpressionTransform<'a> for GetColumnReferences<'a> {
+    fn transform_column(&mut self, name: &'a ColumnName) -> Option<Cow<'a, ColumnName>> {
+        self.references.insert(name);
+        Some(Cow::Borrowed(name))
+    }
+}
+
 /// An expression "transform" that doesn't actually change the expression at all. Instead, it
 /// measures the maximum depth of a expression, with a depth limit to prevent stack overflow. Useful
 /// for verifying that a expression has reasonable depth before attempting to work with it.
diff --git a/kernel/src/predicates/mod.rs b/kernel/src/kernel_predicates/mod.rs
similarity index 81%
rename from kernel/src/predicates/mod.rs
rename to kernel/src/kernel_predicates/mod.rs
index e47da293f..8ded4381a 100644
--- a/kernel/src/predicates/mod.rs
+++ b/kernel/src/kernel_predicates/mod.rs
@@ -55,7 +55,7 @@ mod tests;
 /// NOTE: The error-handling semantics of this trait's scalar-based predicate evaluation may differ
 /// from those of the engine's expression evaluation, because kernel expressions don't include the
 /// necessary type information to reliably detect all type errors.
-pub(crate) trait PredicateEvaluator {
+pub(crate) trait KernelPredicateEvaluator {
     type Output;
 
     /// A (possibly inverted) scalar NULL test, e.g. `<value> IS [NOT] NULL`.
@@ -67,29 +67,11 @@ pub(crate) trait PredicateEvaluator {
     /// A (possibly inverted) NULL check, e.g. `<expr> IS [NOT] NULL`.
     fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option<Self::Output>;
 
-    /// A less-than comparison, e.g. `<col> < <value>`.
-    ///
-    /// NOTE: Caller is responsible to commute and/or invert the operation if needed,
-    /// e.g. `NOT(<value> < <col>)` becomes `<col> <= <value>`.
-    fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output>;
-
-    /// A less-than-or-equal comparison, e.g. `<col> <= <value>`
-    ///
-    /// NOTE: Caller is responsible to commute and/or invert the operation if needed,
-    /// e.g. `NOT(<value> <= <col>)` becomes `<col> < <value>`.
-    fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output>;
+    /// A (possibly inverted) less-than comparison, e.g. `<col> < <value>`.
+    fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output>;
 
-    /// A greater-than comparison, e.g. `<col> > <value>`
-    ///
-    /// NOTE: Caller is responsible to commute and/or invert the operation if needed,
-    /// e.g. `NOT(<value> > <col>)` becomes `<col> >= <value>`.
-    fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output>;
-
-    /// A greater-than-or-equal comparison, e.g. `<col> >= <value>`
-    ///
-    /// NOTE: Caller is responsible to commute and/or invert the operation if needed,
-    /// e.g. `NOT(<value> >= <col>)` becomes `<col> > <value>`.
-    fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output>;
+    /// A (possibly inverted) less-than-or-equal comparison, e.g. `<col> <= <value>`
+    fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output>;
 
     /// A (possibly inverted) equality comparison, e.g. `<col> = <value>` or `<col> != <value>`.
     ///
@@ -210,17 +192,17 @@ pub(crate) trait PredicateEvaluator {
                 return None;
             }
         };
-        match (op, inverted) {
-            (Plus | Minus | Multiply | Divide, _) => None, // Unsupported - not boolean output
-            (LessThan, false) | (GreaterThanOrEqual, true) => self.eval_lt(col, val),
-            (LessThanOrEqual, false) | (GreaterThan, true) => self.eval_le(col, val),
-            (GreaterThan, false) | (LessThanOrEqual, true) => self.eval_gt(col, val),
-            (GreaterThanOrEqual, false) | (LessThan, true) => self.eval_ge(col, val),
-            (Equal, _) => self.eval_eq(col, val, inverted),
-            (NotEqual, _) => self.eval_eq(col, val, !inverted),
-            (Distinct, _) => self.eval_distinct(col, val, inverted),
-            (In, _) => self.eval_in(col, val, inverted),
-            (NotIn, _) => self.eval_in(col, val, !inverted),
+        match op {
+            Plus | Minus | Multiply | Divide => None, // Unsupported - not boolean output
+            LessThan => self.eval_lt(col, val, inverted),
+            GreaterThanOrEqual => self.eval_lt(col, val, !inverted),
+            LessThanOrEqual => self.eval_le(col, val, inverted),
+            GreaterThan => self.eval_le(col, val, !inverted),
+            Equal => self.eval_eq(col, val, inverted),
+            NotEqual => self.eval_eq(col, val, !inverted),
+            Distinct => self.eval_distinct(col, val, inverted),
+            In => self.eval_in(col, val, inverted),
+            NotIn => self.eval_in(col, val, !inverted),
         }
     }
 
@@ -426,16 +408,16 @@ pub(crate) trait PredicateEvaluator {
     }
 }
 
-/// A collection of provided methods from the [`PredicateEvaluator`] trait, factored out to allow
+/// A collection of provided methods from the [`KernelPredicateEvaluator`] trait, factored out to allow
 /// reuse by multiple bool-output predicate evaluator implementations.
-pub(crate) struct PredicateEvaluatorDefaults;
-impl PredicateEvaluatorDefaults {
-    /// Directly null-tests a scalar. See [`PredicateEvaluator::eval_scalar_is_null`].
+pub(crate) struct KernelPredicateEvaluatorDefaults;
+impl KernelPredicateEvaluatorDefaults {
+    /// Directly null-tests a scalar. See [`KernelPredicateEvaluator::eval_scalar_is_null`].
     pub(crate) fn eval_scalar_is_null(val: &Scalar, inverted: bool) -> Option<bool> {
         Some(val.is_null() != inverted)
     }
 
-    /// Directly evaluates a boolean scalar. See [`PredicateEvaluator::eval_scalar`].
+    /// Directly evaluates a boolean scalar. See [`KernelPredicateEvaluator::eval_scalar`].
     pub(crate) fn eval_scalar(val: &Scalar, inverted: bool) -> Option<bool> {
         match val {
             Scalar::Boolean(val) => Some(*val != inverted),
@@ -456,7 +438,7 @@ impl PredicateEvaluatorDefaults {
         Some(matched != inverted)
     }
 
-    /// Directly evaluates a boolean comparison. See [`PredicateEvaluator::eval_binary_scalars`].
+    /// Directly evaluates a boolean comparison. See [`KernelPredicateEvaluator::eval_binary_scalars`].
     pub(crate) fn eval_binary_scalars(
         op: BinaryOperator,
         left: &Scalar,
@@ -479,7 +461,7 @@ impl PredicateEvaluatorDefaults {
     }
 
     /// Finishes evaluating a (possibly inverted) variadic operation. See
-    /// [`PredicateEvaluator::finish_eval_variadic`].
+    /// [`KernelPredicateEvaluator::finish_eval_variadic`].
     ///
     /// The inputs were already inverted by the caller, if needed.
     ///
@@ -511,7 +493,7 @@ impl PredicateEvaluatorDefaults {
     }
 }
 
-/// Resolves columns as scalars, as a building block for [`DefaultPredicateEvaluator`].
+/// Resolves columns as scalars, as a building block for [`DefaultKernelPredicateEvaluator`].
 pub(crate) trait ResolveColumnAsScalar {
     fn resolve_column(&self, col: &ColumnName) -> Option<Scalar>;
 }
@@ -534,8 +516,6 @@ impl ResolveColumnAsScalar for EmptyColumnResolver {
     }
 }
 
-// In testing, it is convenient to just build a hashmap of scalar values.
-#[cfg(test)]
 impl ResolveColumnAsScalar for std::collections::HashMap<ColumnName, Scalar> {
     fn resolve_column(&self, col: &ColumnName) -> Option<Scalar> {
         self.get(col).cloned()
@@ -544,17 +524,17 @@ impl ResolveColumnAsScalar for std::collections::HashMap<ColumnName, Scalar> {
 
 /// A predicate evaluator that directly evaluates the predicate to produce an `Option<bool>`
 /// result. Column resolution is handled by an embedded [`ResolveColumnAsScalar`] instance.
-pub(crate) struct DefaultPredicateEvaluator<R: ResolveColumnAsScalar> {
+pub(crate) struct DefaultKernelPredicateEvaluator<R: ResolveColumnAsScalar> {
     resolver: R,
 }
-impl<R: ResolveColumnAsScalar> DefaultPredicateEvaluator<R> {
+impl<R: ResolveColumnAsScalar> DefaultKernelPredicateEvaluator<R> {
     // Convenient thin wrapper
     fn resolve_column(&self, col: &ColumnName) -> Option<Scalar> {
         self.resolver.resolve_column(col)
     }
 }
 
-impl<R: ResolveColumnAsScalar + 'static> From<R> for DefaultPredicateEvaluator<R> {
+impl<R: ResolveColumnAsScalar + 'static> From<R> for DefaultKernelPredicateEvaluator<R> {
     fn from(resolver: R) -> Self {
         Self { resolver }
     }
@@ -563,15 +543,15 @@ impl<R: ResolveColumnAsScalar + 'static> From<R> for DefaultPredicateEvaluator<R
 /// A "normal" predicate evaluator. It takes expressions as input, uses a [`ResolveColumnAsScalar`]
 /// to convert column references to scalars, and evaluates the resulting constant expression to
 /// produce a boolean output.
-impl<R: ResolveColumnAsScalar> PredicateEvaluator for DefaultPredicateEvaluator<R> {
+impl<R: ResolveColumnAsScalar> KernelPredicateEvaluator for DefaultKernelPredicateEvaluator<R> {
     type Output = bool;
 
     fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option<bool> {
-        PredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted)
+        KernelPredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted)
     }
 
     fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option<bool> {
-        PredicateEvaluatorDefaults::eval_scalar(val, inverted)
+        KernelPredicateEvaluatorDefaults::eval_scalar(val, inverted)
     }
 
     fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option<bool> {
@@ -579,24 +559,14 @@ impl<R: ResolveColumnAsScalar> PredicateEvaluator for DefaultPredicateEvaluator<
         self.eval_scalar_is_null(&col, inverted)
     }
 
-    fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option<bool> {
-        let col = self.resolve_column(col)?;
-        self.eval_binary_scalars(BinaryOperator::LessThan, &col, val, false)
-    }
-
-    fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option<bool> {
-        let col = self.resolve_column(col)?;
-        self.eval_binary_scalars(BinaryOperator::LessThanOrEqual, &col, val, false)
-    }
-
-    fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option<bool> {
+    fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<bool> {
         let col = self.resolve_column(col)?;
-        self.eval_binary_scalars(BinaryOperator::GreaterThan, &col, val, false)
+        self.eval_binary_scalars(BinaryOperator::LessThan, &col, val, inverted)
     }
 
-    fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option<bool> {
+    fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<bool> {
         let col = self.resolve_column(col)?;
-        self.eval_binary_scalars(BinaryOperator::GreaterThanOrEqual, &col, val, false)
+        self.eval_binary_scalars(BinaryOperator::LessThanOrEqual, &col, val, inverted)
     }
 
     fn eval_eq(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<bool> {
@@ -611,7 +581,7 @@ impl<R: ResolveColumnAsScalar> PredicateEvaluator for DefaultPredicateEvaluator<
         right: &Scalar,
         inverted: bool,
     ) -> Option<Self::Output> {
-        PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
+        KernelPredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
     }
 
     fn eval_binary_columns(
@@ -632,7 +602,7 @@ impl<R: ResolveColumnAsScalar> PredicateEvaluator for DefaultPredicateEvaluator<
         exprs: impl IntoIterator<Item = Option<bool>>,
         inverted: bool,
     ) -> Option<bool> {
-        PredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted)
+        KernelPredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted)
     }
 }
 
@@ -660,10 +630,10 @@ pub(crate) trait DataSkippingPredicateEvaluator {
     /// Retrieves the row count of a column (parquet footers always include this stat).
     fn get_rowcount_stat(&self) -> Option<Self::IntStat>;
 
-    /// See [`PredicateEvaluator::eval_scalar_is_null`]
+    /// See [`KernelPredicateEvaluator::eval_scalar_is_null`]
     fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option<Self::Output>;
 
-    /// See [`PredicateEvaluator::eval_scalar`]
+    /// See [`KernelPredicateEvaluator::eval_scalar`]
     fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option<Self::Output>;
 
     /// For IS NULL (IS NOT NULL), we can only skip the file if all-null (no-null). Any other
@@ -675,7 +645,7 @@ pub(crate) trait DataSkippingPredicateEvaluator {
     /// however, so the worst that can happen is we fail to skip an unnecessary file.
     fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option<Self::Output>;
 
-    /// See [`PredicateEvaluator::eval_binary_scalars`]
+    /// See [`KernelPredicateEvaluator::eval_binary_scalars`]
     fn eval_binary_scalars(
         &self,
         op: BinaryOperator,
@@ -684,7 +654,7 @@ pub(crate) trait DataSkippingPredicateEvaluator {
         inverted: bool,
     ) -> Option<Self::Output>;
 
-    /// See [`PredicateEvaluator::finish_eval_variadic`]
+    /// See [`KernelPredicateEvaluator::finish_eval_variadic`]
     fn finish_eval_variadic(
         &self,
         op: VariadicOperator,
@@ -703,7 +673,7 @@ pub(crate) trait DataSkippingPredicateEvaluator {
     ) -> Option<Self::Output>;
 
     /// Performs a partial comparison against a column min-stat. See
-    /// [`PredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics.
+    /// [`KernelPredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics.
     fn partial_cmp_min_stat(
         &self,
         col: &ColumnName,
@@ -716,7 +686,7 @@ pub(crate) trait DataSkippingPredicateEvaluator {
     }
 
     /// Performs a partial comparison against a column max-stat. See
-    /// [`PredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics.
+    /// [`KernelPredicateEvaluatorDefaults::partial_cmp_scalars`] for details of the comparison semantics.
     fn partial_cmp_max_stat(
         &self,
         col: &ColumnName,
@@ -728,53 +698,51 @@ pub(crate) trait DataSkippingPredicateEvaluator {
         self.eval_partial_cmp(ord, max, val, inverted)
     }
 
-    /// See [`PredicateEvaluator::eval_lt`]
-    fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        // Given `col < val`:
-        // Skip if `val` is not greater than _all_ values in [min, max], implies
-        // Skip if `val <= min AND val <= max` implies
-        // Skip if `val <= min` implies
-        // Keep if `NOT(val <= min)` implies
-        // Keep if `val > min` implies
-        // Keep if `min < val`
-        self.partial_cmp_min_stat(col, val, Ordering::Less, false)
-    }
-
-    /// See [`PredicateEvaluator::eval_le`]
-    fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        // Given `col <= val`:
-        // Skip if `val` is less than _all_ values in [min, max], implies
-        // Skip if `val < min AND val < max` implies
-        // Skip if `val < min` implies
-        // Keep if `NOT(val < min)` implies
-        // Keep if `NOT(min > val)`
-        self.partial_cmp_min_stat(col, val, Ordering::Greater, true)
-    }
-
-    /// See [`PredicateEvaluator::eval_gt`]
-    fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        // Given `col > val`:
-        // Skip if `val` is not less than _all_ values in [min, max], implies
-        // Skip if `val >= min AND val >= max` implies
-        // Skip if `val >= max` implies
-        // Keep if `NOT(val >= max)` implies
-        // Keep if `NOT(max <= val)` implies
-        // Keep if `max > val`
-        self.partial_cmp_max_stat(col, val, Ordering::Greater, false)
-    }
-
-    /// See [`PredicateEvaluator::eval_ge`]
-    fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        // Given `col >= val`:
-        // Skip if `val is greater than _every_ value in [min, max], implies
-        // Skip if `val > min AND val > max` implies
-        // Skip if `val > max` implies
-        // Keep if `NOT(val > max)` implies
-        // Keep if `NOT(max < val)`
-        self.partial_cmp_max_stat(col, val, Ordering::Less, true)
-    }
-
-    /// See [`PredicateEvaluator::eval_ge`]
+    /// See [`KernelPredicateEvaluator::eval_lt`]
+    fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output> {
+        if inverted {
+            // Given `col >= val`:
+            // Skip if `val is greater than _every_ value in [min, max], implies
+            // Skip if `val > min AND val > max` implies
+            // Skip if `val > max` implies
+            // Keep if `NOT(val > max)` implies
+            // Keep if `NOT(max < val)`
+            self.partial_cmp_max_stat(col, val, Ordering::Less, true)
+        } else {
+            // Given `col < val`:
+            // Skip if `val` is not greater than _all_ values in [min, max], implies
+            // Skip if `val <= min AND val <= max` implies
+            // Skip if `val <= min` implies
+            // Keep if `NOT(val <= min)` implies
+            // Keep if `val > min` implies
+            // Keep if `min < val`
+            self.partial_cmp_min_stat(col, val, Ordering::Less, false)
+        }
+    }
+
+    /// See [`KernelPredicateEvaluator::eval_le`]
+    fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output> {
+        if inverted {
+            // Given `col > val`:
+            // Skip if `val` is not less than _all_ values in [min, max], implies
+            // Skip if `val >= min AND val >= max` implies
+            // Skip if `val >= max` implies
+            // Keep if `NOT(val >= max)` implies
+            // Keep if `NOT(max <= val)` implies
+            // Keep if `max > val`
+            self.partial_cmp_max_stat(col, val, Ordering::Greater, false)
+        } else {
+            // Given `col <= val`:
+            // Skip if `val` is less than _all_ values in [min, max], implies
+            // Skip if `val < min AND val < max` implies
+            // Skip if `val < min` implies
+            // Keep if `NOT(val < min)` implies
+            // Keep if `NOT(min > val)`
+            self.partial_cmp_min_stat(col, val, Ordering::Greater, true)
+        }
+    }
+
+    /// See [`KernelPredicateEvaluator::eval_ge`]
     fn eval_eq(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output> {
         let (op, exprs) = if inverted {
             // Column could compare not-equal if min or max value differs from the literal.
@@ -795,7 +763,7 @@ pub(crate) trait DataSkippingPredicateEvaluator {
     }
 }
 
-impl<T: DataSkippingPredicateEvaluator> PredicateEvaluator for T {
+impl<T: DataSkippingPredicateEvaluator> KernelPredicateEvaluator for T {
     type Output = T::Output;
 
     fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option<Self::Output> {
@@ -810,20 +778,12 @@ impl<T: DataSkippingPredicateEvaluator> PredicateEvaluator for T {
         self.eval_is_null(col, inverted)
     }
 
-    fn eval_lt(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        self.eval_lt(col, val)
-    }
-
-    fn eval_le(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        self.eval_le(col, val)
-    }
-
-    fn eval_gt(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        self.eval_gt(col, val)
+    fn eval_lt(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output> {
+        self.eval_lt(col, val, inverted)
     }
 
-    fn eval_ge(&self, col: &ColumnName, val: &Scalar) -> Option<Self::Output> {
-        self.eval_ge(col, val)
+    fn eval_le(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output> {
+        self.eval_le(col, val, inverted)
     }
 
     fn eval_eq(&self, col: &ColumnName, val: &Scalar, inverted: bool) -> Option<Self::Output> {
diff --git a/kernel/src/predicates/parquet_stats_skipping.rs b/kernel/src/kernel_predicates/parquet_stats_skipping.rs
similarity index 86%
rename from kernel/src/predicates/parquet_stats_skipping.rs
rename to kernel/src/kernel_predicates/parquet_stats_skipping.rs
index ff7536f40..492277d5a 100644
--- a/kernel/src/predicates/parquet_stats_skipping.rs
+++ b/kernel/src/kernel_predicates/parquet_stats_skipping.rs
@@ -1,6 +1,6 @@
 //! An implementation of data skipping that leverages parquet stats from the file footer.
 use crate::expressions::{BinaryOperator, ColumnName, Scalar, VariadicOperator};
-use crate::predicates::{DataSkippingPredicateEvaluator, PredicateEvaluatorDefaults};
+use crate::kernel_predicates::{DataSkippingPredicateEvaluator, KernelPredicateEvaluatorDefaults};
 use crate::schema::DataType;
 use std::cmp::Ordering;
 
@@ -57,15 +57,15 @@ impl<T: ParquetStatsProvider> DataSkippingPredicateEvaluator for T {
         val: &Scalar,
         inverted: bool,
     ) -> Option<bool> {
-        PredicateEvaluatorDefaults::partial_cmp_scalars(ord, &col, val, inverted)
+        KernelPredicateEvaluatorDefaults::partial_cmp_scalars(ord, &col, val, inverted)
     }
 
     fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option<bool> {
-        PredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted)
+        KernelPredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted)
     }
 
     fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option<bool> {
-        PredicateEvaluatorDefaults::eval_scalar(val, inverted)
+        KernelPredicateEvaluatorDefaults::eval_scalar(val, inverted)
     }
 
     fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option<bool> {
@@ -83,7 +83,7 @@ impl<T: ParquetStatsProvider> DataSkippingPredicateEvaluator for T {
         right: &Scalar,
         inverted: bool,
     ) -> Option<bool> {
-        PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
+        KernelPredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
     }
 
     fn finish_eval_variadic(
@@ -92,6 +92,6 @@ impl<T: ParquetStatsProvider> DataSkippingPredicateEvaluator for T {
         exprs: impl IntoIterator<Item = Option<bool>>,
         inverted: bool,
     ) -> Option<bool> {
-        PredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted)
+        KernelPredicateEvaluatorDefaults::finish_eval_variadic(op, exprs, inverted)
     }
 }
diff --git a/kernel/src/predicates/parquet_stats_skipping/tests.rs b/kernel/src/kernel_predicates/parquet_stats_skipping/tests.rs
similarity index 99%
rename from kernel/src/predicates/parquet_stats_skipping/tests.rs
rename to kernel/src/kernel_predicates/parquet_stats_skipping/tests.rs
index 3eeb49758..8b7e1ffa7 100644
--- a/kernel/src/predicates/parquet_stats_skipping/tests.rs
+++ b/kernel/src/kernel_predicates/parquet_stats_skipping/tests.rs
@@ -1,6 +1,6 @@
 use super::*;
 use crate::expressions::{column_expr, Expression as Expr};
-use crate::predicates::PredicateEvaluator as _;
+use crate::kernel_predicates::KernelPredicateEvaluator as _;
 use crate::DataType;
 
 const TRUE: Option<bool> = Some(true);
diff --git a/kernel/src/predicates/tests.rs b/kernel/src/kernel_predicates/tests.rs
similarity index 95%
rename from kernel/src/predicates/tests.rs
rename to kernel/src/kernel_predicates/tests.rs
index fdeda8305..5c498b020 100644
--- a/kernel/src/predicates/tests.rs
+++ b/kernel/src/kernel_predicates/tests.rs
@@ -43,7 +43,7 @@ fn test_default_eval_scalar() {
     ];
     for (value, inverted, expect) in test_cases.into_iter() {
         assert_eq!(
-            PredicateEvaluatorDefaults::eval_scalar(&value, inverted),
+            KernelPredicateEvaluatorDefaults::eval_scalar(&value, inverted),
             expect,
             "value: {value:?} inverted: {inverted}"
         );
@@ -100,7 +100,7 @@ fn test_default_partial_cmp_scalars() {
     ];
 
     // scalars of different types are always incomparable
-    let compare = PredicateEvaluatorDefaults::partial_cmp_scalars;
+    let compare = KernelPredicateEvaluatorDefaults::partial_cmp_scalars;
     for (i, a) in smaller_values.iter().enumerate() {
         for b in smaller_values.iter().skip(i + 1) {
             for op in [Less, Equal, Greater] {
@@ -193,7 +193,7 @@ fn test_eval_binary_scalars() {
     let smaller_value = Scalar::Long(1);
     let larger_value = Scalar::Long(10);
     for inverted in [true, false] {
-        let compare = PredicateEvaluatorDefaults::eval_binary_scalars;
+        let compare = KernelPredicateEvaluatorDefaults::eval_binary_scalars;
         expect_eq!(
             compare(Equal, &smaller_value, &smaller_value, inverted),
             Some(!inverted),
@@ -269,7 +269,7 @@ fn test_eval_binary_columns() {
         (column_name!("x"), Scalar::from(1)),
         (column_name!("y"), Scalar::from(10)),
     ]);
-    let filter = DefaultPredicateEvaluator::from(columns);
+    let filter = DefaultKernelPredicateEvaluator::from(columns);
     let x = column_expr!("x");
     let y = column_expr!("y");
     for inverted in [true, false] {
@@ -307,7 +307,7 @@ fn test_eval_variadic() {
         (&[Some(false), Some(true), None], Some(false), Some(true)),
         (&[Some(true), Some(false), None], Some(false), Some(true)),
     ];
-    let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver);
+    let filter = DefaultKernelPredicateEvaluator::from(UnimplementedColumnResolver);
     for (inputs, expect_and, expect_or) in test_cases.iter() {
         let inputs: Vec<_> = inputs
             .iter()
@@ -343,7 +343,7 @@ fn test_eval_column() {
     ];
     let col = &column_name!("x");
     for (input, expect) in &test_cases {
-        let filter = DefaultPredicateEvaluator::from(input.clone());
+        let filter = DefaultKernelPredicateEvaluator::from(input.clone());
         for inverted in [true, false] {
             expect_eq!(
                 filter.eval_column(col, inverted),
@@ -362,7 +362,7 @@ fn test_eval_not() {
         (Scalar::Null(DataType::BOOLEAN), None),
         (Scalar::Long(1), None),
     ];
-    let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver);
+    let filter = DefaultKernelPredicateEvaluator::from(UnimplementedColumnResolver);
     for (input, expect) in test_cases {
         let input = input.into();
         for inverted in [true, false] {
@@ -378,7 +378,7 @@ fn test_eval_not() {
 #[test]
 fn test_eval_is_null() {
     let expr = column_expr!("x");
-    let filter = DefaultPredicateEvaluator::from(Scalar::from(1));
+    let filter = DefaultKernelPredicateEvaluator::from(Scalar::from(1));
     expect_eq!(
         filter.eval_unary(UnaryOperator::IsNull, &expr, true),
         Some(true),
@@ -408,7 +408,7 @@ fn test_eval_distinct() {
     let one = &Scalar::from(1);
     let two = &Scalar::from(2);
     let null = &Scalar::Null(DataType::INTEGER);
-    let filter = DefaultPredicateEvaluator::from(one.clone());
+    let filter = DefaultKernelPredicateEvaluator::from(one.clone());
     let col = &column_name!("x");
     expect_eq!(
         filter.eval_distinct(col, one, true),
@@ -441,7 +441,7 @@ fn test_eval_distinct() {
         "DISTINCT(x, NULL) (x = 1)"
     );
 
-    let filter = DefaultPredicateEvaluator::from(null.clone());
+    let filter = DefaultKernelPredicateEvaluator::from(null.clone());
     expect_eq!(
         filter.eval_distinct(col, one, true),
         Some(false),
@@ -470,7 +470,7 @@ fn test_eval_distinct() {
 fn eval_binary() {
     let col = column_expr!("x");
     let val = Expression::literal(10);
-    let filter = DefaultPredicateEvaluator::from(Scalar::from(1));
+    let filter = DefaultKernelPredicateEvaluator::from(Scalar::from(1));
 
     // unsupported
     expect_eq!(
@@ -585,8 +585,8 @@ fn test_sql_where() {
     const NULL: Expr = Expr::Literal(Scalar::Null(DataType::BOOLEAN));
     const FALSE: Expr = Expr::Literal(Scalar::Boolean(false));
     const TRUE: Expr = Expr::Literal(Scalar::Boolean(true));
-    let null_filter = DefaultPredicateEvaluator::from(NullColumnResolver);
-    let empty_filter = DefaultPredicateEvaluator::from(EmptyColumnResolver);
+    let null_filter = DefaultKernelPredicateEvaluator::from(NullColumnResolver);
+    let empty_filter = DefaultKernelPredicateEvaluator::from(EmptyColumnResolver);
 
     // Basic sanity check
     expect_eq!(null_filter.eval_sql_where(&VAL), None, "WHERE {VAL}");
diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs
index 8dde21afe..d6797ba51 100644
--- a/kernel/src/lib.rs
+++ b/kernel/src/lib.rs
@@ -35,15 +35,15 @@
 //!
 //! ## Expression handling
 //!
-//! Expression handling is done via the [`ExpressionHandler`], which in turn allows the creation of
+//! Expression handling is done via the [`EvaluationHandler`], which in turn allows the creation of
 //! [`ExpressionEvaluator`]s. These evaluators are created for a specific predicate [`Expression`]
 //! and allow evaluation of that predicate for a specific batches of data.
 //!
 //! ## File system interactions
 //!
 //! Delta Kernel needs to perform some basic operations against file systems like listing and
-//! reading files. These interactions are encapsulated in the [`FileSystemClient`] trait.
-//! Implementors must take care that all assumptions on the behavior if the functions - like sorted
+//! reading files. These interactions are encapsulated in the [`StorageHandler`] trait.
+//! Implementers must take care that all assumptions on the behavior if the functions - like sorted
 //! results - are respected.
 //!
 //! ## Reading log and data files
@@ -51,7 +51,7 @@
 //! Delta Kernel requires the capability to read and write json files and read parquet files, which
 //! is exposed via the [`JsonHandler`] and [`ParquetHandler`] respectively. When reading files,
 //! connectors are asked to provide the context information it requires to execute the actual
-//! operation. This is done by invoking methods on the [`FileSystemClient`] trait.
+//! operation. This is done by invoking methods on the [`StorageHandler`] trait.
 
 #![cfg_attr(all(doc, NIGHTLY_CHANNEL), feature(doc_auto_cfg))]
 #![warn(
@@ -74,6 +74,7 @@ use url::Url;
 use self::schema::{DataType, SchemaRef};
 
 pub mod actions;
+mod checkpoint;
 pub mod engine_data;
 pub mod error;
 pub mod expressions;
@@ -87,7 +88,9 @@ pub mod table_features;
 pub mod table_properties;
 pub mod transaction;
 
-pub(crate) mod predicates;
+pub mod arrow;
+pub(crate) mod kernel_predicates;
+pub mod parquet;
 pub(crate) mod utils;
 
 #[cfg(feature = "developer-visibility")]
@@ -95,6 +98,11 @@ pub mod path;
 #[cfg(not(feature = "developer-visibility"))]
 pub(crate) mod path;
 
+#[cfg(feature = "developer-visibility")]
+pub mod log_replay;
+#[cfg(not(feature = "developer-visibility"))]
+pub(crate) mod log_replay;
+
 #[cfg(feature = "developer-visibility")]
 pub mod log_segment;
 #[cfg(not(feature = "developer-visibility"))]
@@ -106,6 +114,10 @@ pub use error::{DeltaResult, Error};
 pub use expressions::{Expression, ExpressionRef};
 pub use table::Table;
 
+use expressions::literal_expression_transform::LiteralExpressionTransform;
+use expressions::Scalar;
+use schema::{SchemaTransform, StructField, StructType};
+
 #[cfg(any(
     feature = "default-engine",
     feature = "sync-engine",
@@ -320,7 +332,7 @@ pub trait ExpressionEvaluator: AsAny {
 ///
 /// Delta Kernel can use this handler to evaluate predicate on partition filters,
 /// fill up partition column values and any computation on data using Expressions.
-pub trait ExpressionHandler: AsAny {
+pub trait EvaluationHandler: AsAny {
     /// Create an [`ExpressionEvaluator`] that can evaluate the given [`Expression`]
     /// on columnar batches with the given [`Schema`] to produce data of [`DataType`].
     ///
@@ -332,22 +344,61 @@ pub trait ExpressionHandler: AsAny {
     ///
     /// [`Schema`]: crate::schema::StructType
     /// [`DataType`]: crate::schema::DataType
-    fn get_evaluator(
+    fn new_expression_evaluator(
         &self,
         schema: SchemaRef,
         expression: Expression,
         output_type: DataType,
     ) -> Arc<dyn ExpressionEvaluator>;
+
+    /// Create a single-row all-null-value [`EngineData`] with the schema specified by
+    /// `output_schema`.
+    // NOTE: we should probably allow DataType instead of SchemaRef, but can expand that in the
+    // future.
+    fn null_row(&self, output_schema: SchemaRef) -> DeltaResult<Box<dyn EngineData>>;
 }
 
+/// Internal trait to allow us to have a private `create_one` API that's implemented for all
+/// EvaluationHandlers.
+// For some reason rustc doesn't detect it's usage so we allow(dead_code) here...
+#[allow(dead_code)]
+trait EvaluationHandlerExtension: EvaluationHandler {
+    /// Create a single-row [`EngineData`] by applying the given schema to the leaf-values given in
+    /// `values`.
+    // Note: we will stick with a Schema instead of DataType (more constrained can expand in
+    // future)
+    fn create_one(&self, schema: SchemaRef, values: &[Scalar]) -> DeltaResult<Box<dyn EngineData>> {
+        // just get a single int column (arbitrary)
+        let null_row_schema = Arc::new(StructType::new(vec![StructField::nullable(
+            "null_col",
+            DataType::INTEGER,
+        )]));
+        let null_row = self.null_row(null_row_schema.clone())?;
+
+        // Convert schema and leaf values to an expression
+        let mut schema_transform = LiteralExpressionTransform::new(values);
+        schema_transform.transform_struct(schema.as_ref());
+        let row_expr = schema_transform.try_into_expr()?;
+
+        let eval = self.new_expression_evaluator(null_row_schema, row_expr, schema.into());
+        eval.evaluate(null_row.as_ref())
+    }
+}
+
+// Auto-implement the extension trait for all EvaluationHandlers
+impl<T: EvaluationHandler> EvaluationHandlerExtension for T {}
+
 /// Provides file system related functionalities to Delta Kernel.
 ///
-/// Delta Kernel uses this client whenever it needs to access the underlying
+/// Delta Kernel uses this handler whenever it needs to access the underlying
 /// file system where the Delta table is present. Connector implementation of
 /// this trait can hide filesystem specific details from Delta Kernel.
-pub trait FileSystemClient: AsAny {
-    /// List the paths in the same directory that are lexicographically greater or equal to
+pub trait StorageHandler: AsAny {
+    /// List the paths in the same directory that are lexicographically greater than
     /// (UTF-8 sorting) the given `path`. The result should also be sorted by the file name.
+    ///
+    /// If the path is directory-like (ends with '/'), the result should contain
+    /// all the files in the directory.
     fn list_from(&self, path: &Url)
         -> DeltaResult<Box<dyn Iterator<Item = DeltaResult<FileMeta>>>>;
 
@@ -360,7 +411,7 @@ pub trait FileSystemClient: AsAny {
 
 /// Provides JSON handling functionality to Delta Kernel.
 ///
-/// Delta Kernel can use this client to parse JSON strings into Row or read content from JSON files.
+/// Delta Kernel can use this handler to parse JSON strings into Row or read content from JSON files.
 /// Connectors can leverage this trait to provide their best implementation of the JSON parsing
 /// capability to Delta Kernel.
 pub trait JsonHandler: AsAny {
@@ -457,17 +508,17 @@ pub trait ParquetHandler: AsAny {
 /// Engines/Connectors are expected to pass an implementation of this trait when reading a Delta
 /// table.
 pub trait Engine: AsAny {
-    /// Get the connector provided [`ExpressionHandler`].
-    fn get_expression_handler(&self) -> Arc<dyn ExpressionHandler>;
+    /// Get the connector provided [`EvaluationHandler`].
+    fn evaluation_handler(&self) -> Arc<dyn EvaluationHandler>;
 
-    /// Get the connector provided [`FileSystemClient`]
-    fn get_file_system_client(&self) -> Arc<dyn FileSystemClient>;
+    /// Get the connector provided [`StorageHandler`]
+    fn storage_handler(&self) -> Arc<dyn StorageHandler>;
 
     /// Get the connector provided [`JsonHandler`].
-    fn get_json_handler(&self) -> Arc<dyn JsonHandler>;
+    fn json_handler(&self) -> Arc<dyn JsonHandler>;
 
     /// Get the connector provided [`ParquetHandler`].
-    fn get_parquet_handler(&self) -> Arc<dyn ParquetHandler>;
+    fn parquet_handler(&self) -> Arc<dyn ParquetHandler>;
 }
 
 // we have an 'internal' feature flag: default-engine-base, which is actually just the shared
diff --git a/kernel/src/log_replay.rs b/kernel/src/log_replay.rs
new file mode 100644
index 000000000..c9a58492f
--- /dev/null
+++ b/kernel/src/log_replay.rs
@@ -0,0 +1,308 @@
+//! This module provides log replay utilities.
+//!
+//! Log replay is the process of transforming an iterator of action batches (read from Delta
+//! transaction logs) into an iterator of filtered/transformed actions for specific use cases.
+//! The logs, which record all table changes as JSON entries, are processed batch by batch,
+//! typically from newest to oldest.
+//!
+//! Log replay is currently implemented for table scans, which filter and apply transformations
+//! to produce file actions which builds the view of the table state at a specific point in time.
+//! Future extensions will support additional log replay processors beyond the current use case.
+//! (e.g. checkpointing: filter actions to include only those needed to rebuild table state)
+//!
+//! This module provides structures for efficient batch processing, focusing on file action
+//! deduplication with `FileActionDeduplicator` which tracks unique files across log batches
+//! to minimize memory usage for tables with extensive history.
+
+use std::collections::HashSet;
+
+use crate::actions::deletion_vector::DeletionVectorDescriptor;
+use crate::engine_data::{GetData, TypedGetData};
+use crate::scan::data_skipping::DataSkippingFilter;
+use crate::{DeltaResult, EngineData};
+
+use tracing::debug;
+
+/// The subset of file action fields that uniquely identifies it in the log, used for deduplication
+/// of adds and removes during log replay.
+#[derive(Debug, Hash, Eq, PartialEq)]
+pub(crate) struct FileActionKey {
+    pub(crate) path: String,
+    pub(crate) dv_unique_id: Option<String>,
+}
+impl FileActionKey {
+    pub(crate) fn new(path: impl Into<String>, dv_unique_id: Option<String>) -> Self {
+        let path = path.into();
+        Self { path, dv_unique_id }
+    }
+}
+
+/// Maintains state and provides functionality for deduplicating file actions during log replay.
+///
+/// This struct is embedded in visitors to track which files have been seen across multiple
+/// log batches. Since logs are processed newest-to-oldest, this deduplicator ensures that each
+/// unique file (identified by path and deletion vector ID) is processed only once. Performing
+/// deduplication at the visitor level avoids having to load all actions into memory at once,
+/// significantly reducing memory usage for large Delta tables with extensive history.
+///
+/// TODO: Modify deduplication to track only file paths instead of (path, dv_unique_id).
+/// More info here: https://github.com/delta-io/delta-kernel-rs/issues/701
+pub(crate) struct FileActionDeduplicator<'seen> {
+    /// A set of (data file path, dv_unique_id) pairs that have been seen thus
+    /// far in the log for deduplication. This is a mutable reference to the set
+    /// of seen file keys that persists across multiple log batches.
+    seen_file_keys: &'seen mut HashSet<FileActionKey>,
+    // TODO: Consider renaming to `is_commit_batch`, `deduplicate_batch`, or `save_batch`
+    // to better reflect its role in deduplication logic.
+    /// Whether we're processing a log batch (as opposed to a checkpoint)
+    is_log_batch: bool,
+    /// Index of the getter containing the add.path column
+    add_path_index: usize,
+    /// Index of the getter containing the remove.path column
+    remove_path_index: usize,
+    /// Starting index for add action deletion vector columns
+    add_dv_start_index: usize,
+    /// Starting index for remove action deletion vector columns
+    remove_dv_start_index: usize,
+}
+
+impl<'seen> FileActionDeduplicator<'seen> {
+    pub(crate) fn new(
+        seen_file_keys: &'seen mut HashSet<FileActionKey>,
+        is_log_batch: bool,
+        add_path_index: usize,
+        remove_path_index: usize,
+        add_dv_start_index: usize,
+        remove_dv_start_index: usize,
+    ) -> Self {
+        Self {
+            seen_file_keys,
+            is_log_batch,
+            add_path_index,
+            remove_path_index,
+            add_dv_start_index,
+            remove_dv_start_index,
+        }
+    }
+
+    /// Checks if log replay already processed this logical file (in which case the current action
+    /// should be ignored). If not already seen, register it so we can recognize future duplicates.
+    /// Returns `true` if we have seen the file and should ignore it, `false` if we have not seen it
+    /// and should process it.
+    pub(crate) fn check_and_record_seen(&mut self, key: FileActionKey) -> bool {
+        // Note: each (add.path + add.dv_unique_id()) pair has a
+        // unique Add + Remove pair in the log. For example:
+        // https://github.com/delta-io/delta/blob/master/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json
+
+        if self.seen_file_keys.contains(&key) {
+            debug!(
+                "Ignoring duplicate ({}, {:?}) in scan, is log {}",
+                key.path, key.dv_unique_id, self.is_log_batch
+            );
+            true
+        } else {
+            debug!(
+                "Including ({}, {:?}) in scan, is log {}",
+                key.path, key.dv_unique_id, self.is_log_batch
+            );
+            if self.is_log_batch {
+                // Remember file actions from this batch so we can ignore duplicates as we process
+                // batches from older commit and/or checkpoint files. We don't track checkpoint
+                // batches because they are already the oldest actions and never replace anything.
+                self.seen_file_keys.insert(key);
+            }
+            false
+        }
+    }
+
+    /// Extracts the deletion vector unique ID if it exists.
+    ///
+    /// This function retrieves the necessary fields for constructing a deletion vector unique ID
+    /// by accessing `getters` at `dv_start_index` and the following two indices. Specifically:
+    /// - `dv_start_index` retrieves the storage type (`deletionVector.storageType`).
+    /// - `dv_start_index + 1` retrieves the path or inline deletion vector (`deletionVector.pathOrInlineDv`).
+    /// - `dv_start_index + 2` retrieves the optional offset (`deletionVector.offset`).
+    fn extract_dv_unique_id<'a>(
+        &self,
+        i: usize,
+        getters: &[&'a dyn GetData<'a>],
+        dv_start_index: usize,
+    ) -> DeltaResult<Option<String>> {
+        match getters[dv_start_index].get_opt(i, "deletionVector.storageType")? {
+            Some(storage_type) => {
+                let path_or_inline =
+                    getters[dv_start_index + 1].get(i, "deletionVector.pathOrInlineDv")?;
+                let offset = getters[dv_start_index + 2].get_opt(i, "deletionVector.offset")?;
+
+                Ok(Some(DeletionVectorDescriptor::unique_id_from_parts(
+                    storage_type,
+                    path_or_inline,
+                    offset,
+                )))
+            }
+            None => Ok(None),
+        }
+    }
+
+    /// Extracts a file action key and determines if it's an add operation.
+    /// This method examines the data at the given index using the provided getters
+    /// to identify whether a file action exists and what type it is.
+    ///
+    /// # Arguments
+    ///
+    /// * `i` - Index position in the data structure to examine
+    /// * `getters` - Collection of data getter implementations used to access the data
+    /// * `skip_removes` - Whether to skip remove actions when extracting file actions
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(Some((key, is_add)))` - When a file action is found, returns the key and whether it's an add operation
+    /// * `Ok(None)` - When no file action is found
+    /// * `Err(...)` - On any error during extraction
+    pub(crate) fn extract_file_action<'a>(
+        &self,
+        i: usize,
+        getters: &[&'a dyn GetData<'a>],
+        skip_removes: bool,
+    ) -> DeltaResult<Option<(FileActionKey, bool)>> {
+        // Try to extract an add action by the required path column
+        if let Some(path) = getters[self.add_path_index].get_str(i, "add.path")? {
+            let dv_unique_id = self.extract_dv_unique_id(i, getters, self.add_dv_start_index)?;
+            return Ok(Some((FileActionKey::new(path, dv_unique_id), true)));
+        }
+
+        // The AddRemoveDedupVisitor skips remove actions when extracting file actions from a checkpoint batch.
+        if skip_removes {
+            return Ok(None);
+        }
+
+        // Try to extract a remove action by the required path column
+        if let Some(path) = getters[self.remove_path_index].get_str(i, "remove.path")? {
+            let dv_unique_id = self.extract_dv_unique_id(i, getters, self.remove_dv_start_index)?;
+            return Ok(Some((FileActionKey::new(path, dv_unique_id), false)));
+        }
+
+        // No file action found
+        Ok(None)
+    }
+
+    /// Returns whether we are currently processing a log batch.
+    ///
+    /// `true` indicates we are processing a batch from a commit file.
+    /// `false` indicates we are processing a batch from a checkpoint.
+    pub(crate) fn is_log_batch(&self) -> bool {
+        self.is_log_batch
+    }
+}
+
+/// A trait for processing batches of actions from Delta transaction logs during log replay.
+///
+/// Log replay processors scan transaction logs in **reverse chronological order** (newest to oldest),
+/// filtering and transforming action batches into specialized output types. These processors:
+///
+/// - **Track and deduplicate file actions** to apply appropriate `Remove` actions to corresponding
+///   `Add` actions (and omit the file from the log replay output)
+/// - **Maintain selection vectors** to indicate which actions in each batch should be included.
+/// - **Apply custom filtering logic** based on the processor’s purpose (e.g., checkpointing, scanning).
+/// - **Data skipping** filters are applied to the initial selection vector to reduce the number of rows
+///   processed by the processor, (if a filter is provided).
+///
+/// Implementations:
+/// - `ScanLogReplayProcessor`: Used for table scans, this processor filters and selects deduplicated
+///   `Add` actions from log batches to reconstruct the view of the table at a specific point in time.
+///   Note that scans do not expose `Remove` actions. Data skipping may be applied when a predicate is
+///   provided.
+///
+/// - `CheckpointLogReplayProcessor` (WIP): Will be responsible for processing log batches to construct
+///   V1 spec checkpoint files. Unlike scans, checkpoint processing includes additional actions, such as
+///   `Remove`, `Metadata`, and `Protocol`, required to fully reconstruct table state.
+///   Data skipping is not applied during checkpoint processing.
+///
+/// The `Output` type represents the material result of log replay, and it must implement the
+/// `HasSelectionVector` trait to allow filtering of irrelevant rows:
+///
+/// - For **scans**, the output type is `ScanMetadata`, which contains the file actions (`Add`
+///   actions) that need to be applied to build the table's view, accompanied by a
+///   **selection vector** that identifies which rows should be included. A transform vector may
+///   also be included to handle schema changes, such as renaming columns or modifying data types.
+///
+/// - For **checkpoints**, the output includes the actions necessary to write to the checkpoint file (`Add`,
+///   `Remove`, `Metadata`, `Protocol` actions), filtered by the **selection vector** to determine which
+///   rows are included in the final checkpoint.
+///
+/// TODO: Refactor the Change Data Feed (CDF) processor to use this trait.
+pub(crate) trait LogReplayProcessor: Sized {
+    /// The type of results produced by this processor must implement the
+    /// `HasSelectionVector` trait to allow filtering out batches with no selected rows.
+    type Output: HasSelectionVector;
+
+    /// Processes a batch of actions and returns the filtered results.
+    ///
+    /// # Arguments
+    /// - `actions_batch` - A reference to an [`EngineData`] instance representing a batch of actions.
+    /// - `is_log_batch` - `true` if the batch originates from a commit log, `false` if from a checkpoint.
+    ///
+    /// Returns a [`DeltaResult`] containing the processor’s output, which includes only selected actions.
+    ///
+    /// Note: Since log replay is stateful, processing may update internal processor state (e.g., deduplication sets).
+    fn process_actions_batch(
+        &mut self,
+        actions_batch: &dyn EngineData,
+        is_log_batch: bool,
+    ) -> DeltaResult<Self::Output>;
+
+    /// Applies the processor to an actions iterator and filters out empty results.
+    ///
+    /// # Arguments
+    /// * `action_iter` - Iterator of action batches and their source flags
+    ///
+    /// Returns an iterator that yields the Output type of the processor.
+    fn process_actions_iter(
+        mut self,
+        action_iter: impl Iterator<Item = DeltaResult<(Box<dyn EngineData>, bool)>>,
+    ) -> impl Iterator<Item = DeltaResult<Self::Output>> {
+        action_iter
+            .map(move |action_res| {
+                let (batch, is_log_batch) = action_res?;
+                self.process_actions_batch(batch.as_ref(), is_log_batch)
+            })
+            .filter(|res| {
+                // TODO: Leverage .is_none_or() when msrv = 1.82
+                res.as_ref()
+                    .map_or(true, |result| result.has_selected_rows())
+            })
+    }
+
+    /// Builds the initial selection vector for the action batch, used to filter out rows that
+    /// are not relevant to the current processor's purpose (e.g., checkpointing, scanning).
+    /// This method performs a first pass of filtering using an optional [`DataSkippingFilter`].
+    /// If no filter is provided, it assumes that all rows should be selected.
+    ///
+    /// The selection vector is further updated based on the processor's logic in the
+    /// `process_actions_batch` method.
+    ///
+    /// # Arguments
+    /// - `batch` - A reference to the batch of actions to be processed.
+    ///
+    /// # Returns
+    /// A `DeltaResult<Vec<bool>>`, where each boolean indicates if the corresponding row should be included.
+    /// If no filter is provided, all rows are selected.
+    fn build_selection_vector(&self, batch: &dyn EngineData) -> DeltaResult<Vec<bool>> {
+        match self.data_skipping_filter() {
+            Some(filter) => filter.apply(batch),
+            None => Ok(vec![true; batch.len()]), // If no filter is provided, select all rows
+        }
+    }
+
+    /// Returns an optional reference to the [`DataSkippingFilter`] used to filter rows
+    /// when building the initial selection vector in `build_selection_vector`.
+    /// If `None` is returned, no filter is applied, and all rows are selected.
+    fn data_skipping_filter(&self) -> Option<&DataSkippingFilter>;
+}
+
+/// This trait is used to determine if a processor's output contains any selected rows.
+/// This is used to filter out batches with no selected rows from the log replay results.
+pub(crate) trait HasSelectionVector {
+    /// Check if the selection vector contains at least one selected row
+    fn has_selected_rows(&self) -> bool;
+}
diff --git a/kernel/src/log_segment.rs b/kernel/src/log_segment.rs
index b4f255c57..e55e1791a 100644
--- a/kernel/src/log_segment.rs
+++ b/kernel/src/log_segment.rs
@@ -1,13 +1,18 @@
 //! Represents a segment of a delta log. [`LogSegment`] wraps a set of  checkpoint and commit
 //! files.
 
-use crate::actions::{get_log_schema, Metadata, Protocol, METADATA_NAME, PROTOCOL_NAME};
+use crate::actions::visitors::SidecarVisitor;
+use crate::actions::{
+    get_log_schema, Metadata, Protocol, ADD_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME,
+    SIDECAR_NAME,
+};
 use crate::path::{LogPathFileType, ParsedLogPath};
 use crate::schema::SchemaRef;
-use crate::snapshot::CheckpointMetadata;
+use crate::snapshot::LastCheckpointHint;
 use crate::utils::require;
 use crate::{
-    DeltaResult, Engine, EngineData, Error, Expression, ExpressionRef, FileSystemClient, Version,
+    DeltaResult, Engine, EngineData, Error, Expression, ExpressionRef, ParquetHandler, RowVisitor,
+    StorageHandler, Version,
 };
 use itertools::Itertools;
 use std::collections::HashMap;
@@ -32,10 +37,11 @@ mod tests;
 /// and in `TableChanges` when built with [`LogSegment::for_table_changes`].
 ///
 /// [`Snapshot`]: crate::snapshot::Snapshot
-#[derive(Debug)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 pub(crate) struct LogSegment {
     pub end_version: Version,
+    pub checkpoint_version: Option<Version>,
     pub log_root: Url,
     /// Sorted commit files in the log segment (ascending)
     pub ascending_commit_files: Vec<ParsedLogPath>,
@@ -44,12 +50,18 @@ pub(crate) struct LogSegment {
 }
 
 impl LogSegment {
-    fn try_new(
-        ascending_commit_files: Vec<ParsedLogPath>,
+    pub(crate) fn try_new(
+        mut ascending_commit_files: Vec<ParsedLogPath>,
         checkpoint_parts: Vec<ParsedLogPath>,
         log_root: Url,
         end_version: Option<Version>,
     ) -> DeltaResult<Self> {
+        // Commit file versions must be greater than the most recent checkpoint version if it exists
+        let checkpoint_version = checkpoint_parts.first().map(|checkpoint_file| {
+            ascending_commit_files.retain(|log_path| checkpoint_file.version < log_path.version);
+            checkpoint_file.version
+        });
+
         // We require that commits that are contiguous. In other words, there must be no gap between commit versions.
         require!(
             ascending_commit_files
@@ -63,35 +75,37 @@ impl LogSegment {
 
         // There must be no gap between a checkpoint and the first commit version. Note that
         // that all checkpoint parts share the same version.
-        if let (Some(checkpoint_file), Some(commit_file)) =
-            (checkpoint_parts.first(), ascending_commit_files.first())
+        if let (Some(checkpoint_version), Some(commit_file)) =
+            (checkpoint_version, ascending_commit_files.first())
         {
             require!(
-                checkpoint_file.version + 1 == commit_file.version,
+                checkpoint_version + 1 == commit_file.version,
                 Error::InvalidCheckpoint(format!(
                     "Gap between checkpoint version {} and next commit {}",
-                    checkpoint_file.version, commit_file.version,
+                    checkpoint_version, commit_file.version,
                 ))
             )
         }
 
         // Get the effective version from chosen files
-        let version_eff = ascending_commit_files
+        let effective_version = ascending_commit_files
             .last()
             .or(checkpoint_parts.first())
             .ok_or(Error::generic("No files in log segment"))?
             .version;
         if let Some(end_version) = end_version {
             require!(
-                version_eff == end_version,
+                effective_version == end_version,
                 Error::generic(format!(
                     "LogSegment end version {} not the same as the specified end version {}",
-                    version_eff, end_version
+                    effective_version, end_version
                 ))
             );
         }
+
         Ok(LogSegment {
-            end_version: version_eff,
+            end_version: effective_version,
+            checkpoint_version,
             log_root,
             ascending_commit_files,
             checkpoint_parts,
@@ -104,35 +118,28 @@ impl LogSegment {
     /// parts. All these parts will have the same checkpoint version.
     ///
     /// The options for constructing a LogSegment for Snapshot are as follows:
-    /// - `checkpoint_hint`: a `CheckpointMetadata` to start the log segment from (e.g. from reading the `last_checkpoint` file).
+    /// - `checkpoint_hint`: a `LastCheckpointHint` to start the log segment from (e.g. from reading the `last_checkpoint` file).
     /// - `time_travel_version`: The version of the log that the Snapshot will be at.
     ///
     /// [`Snapshot`]: crate::snapshot::Snapshot
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn for_snapshot(
-        fs_client: &dyn FileSystemClient,
+        storage: &dyn StorageHandler,
         log_root: Url,
-        checkpoint_hint: impl Into<Option<CheckpointMetadata>>,
+        checkpoint_hint: impl Into<Option<LastCheckpointHint>>,
         time_travel_version: impl Into<Option<Version>>,
     ) -> DeltaResult<Self> {
         let time_travel_version = time_travel_version.into();
 
-        let (mut ascending_commit_files, checkpoint_parts) =
+        let (ascending_commit_files, checkpoint_parts) =
             match (checkpoint_hint.into(), time_travel_version) {
-                (Some(cp), None) => {
-                    list_log_files_with_checkpoint(&cp, fs_client, &log_root, None)?
-                }
+                (Some(cp), None) => list_log_files_with_checkpoint(&cp, storage, &log_root, None)?,
                 (Some(cp), Some(end_version)) if cp.version <= end_version => {
-                    list_log_files_with_checkpoint(&cp, fs_client, &log_root, Some(end_version))?
+                    list_log_files_with_checkpoint(&cp, storage, &log_root, Some(end_version))?
                 }
-                _ => list_log_files_with_version(fs_client, &log_root, None, time_travel_version)?,
+                _ => list_log_files_with_version(storage, &log_root, None, time_travel_version)?,
             };
 
-        // Commit file versions must be greater than the most recent checkpoint version if it exists
-        if let Some(checkpoint_file) = checkpoint_parts.first() {
-            ascending_commit_files.retain(|log_path| checkpoint_file.version < log_path.version);
-        }
-
         LogSegment::try_new(
             ascending_commit_files,
             checkpoint_parts,
@@ -147,7 +154,7 @@ impl LogSegment {
     /// is specified it will be the most recent version by default.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn for_table_changes(
-        fs_client: &dyn FileSystemClient,
+        storage: &dyn StorageHandler,
         log_root: Url,
         start_version: Version,
         end_version: impl Into<Option<Version>>,
@@ -162,7 +169,7 @@ impl LogSegment {
         }
 
         let ascending_commit_files: Vec<_> =
-            list_log_files(fs_client, &log_root, start_version, end_version)?
+            list_log_files(storage, &log_root, start_version, end_version)?
                 .filter_ok(|x| x.is_commit())
                 .try_collect()?;
 
@@ -181,19 +188,21 @@ impl LogSegment {
         );
         LogSegment::try_new(ascending_commit_files, vec![], log_root, end_version)
     }
-    /// Read a stream of log data from this log segment.
+
+    /// Read a stream of actions from this log segment. This returns an iterator of (EngineData,
+    /// bool) pairs, where the boolean flag indicates whether the data was read from a commit file
+    /// (true) or a checkpoint file (false).
     ///
     /// The log files will be read from most recent to oldest.
-    /// The boolean flags indicates whether the data was read from
-    /// a commit file (true) or a checkpoint file (false).
     ///
-    /// `read_schema` is the schema to read the log files with. This can be used
-    /// to project the log files to a subset of the columns.
+    /// `commit_read_schema` is the (physical) schema to read the commit files with, and
+    /// `checkpoint_read_schema` is the (physical) schema to read checkpoint files with. This can be
+    /// used to project the log files to a subset of the columns.
     ///
     /// `meta_predicate` is an optional expression to filter the log files with. It is _NOT_ the
     /// query's predicate, but rather a predicate for filtering log files themselves.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
-    pub(crate) fn replay(
+    pub(crate) fn read_actions(
         &self,
         engine: &dyn Engine,
         commit_read_schema: SchemaRef,
@@ -209,25 +218,158 @@ impl LogSegment {
             .map(|f| f.location.clone())
             .collect();
         let commit_stream = engine
-            .get_json_handler()
+            .json_handler()
             .read_json_files(&commit_files, commit_read_schema, meta_predicate.clone())?
             .map_ok(|batch| (batch, true));
 
-        let checkpoint_parts: Vec<_> = self
+        let checkpoint_stream =
+            self.create_checkpoint_stream(engine, checkpoint_read_schema, meta_predicate)?;
+
+        Ok(commit_stream.chain(checkpoint_stream))
+    }
+
+    /// Returns an iterator over checkpoint data, processing sidecar files when necessary.
+    ///
+    /// By default, `create_checkpoint_stream` checks for the presence of sidecar files, and
+    /// reads their contents if present. Checking for sidecar files is skipped if:
+    /// - The checkpoint is a multi-part checkpoint
+    /// - The checkpoint read schema does not contain a file action
+    ///
+    /// For single-part checkpoints, any referenced sidecar files are processed. These
+    /// sidecar files contain the actual file actions that would otherwise be
+    /// stored directly in the checkpoint. The sidecar file batches are chained to the
+    /// checkpoint batch in the top level iterator to be returned.
+    fn create_checkpoint_stream(
+        &self,
+        engine: &dyn Engine,
+        checkpoint_read_schema: SchemaRef,
+        meta_predicate: Option<ExpressionRef>,
+    ) -> DeltaResult<impl Iterator<Item = DeltaResult<(Box<dyn EngineData>, bool)>> + Send> {
+        let need_file_actions = checkpoint_read_schema.contains(ADD_NAME)
+            || checkpoint_read_schema.contains(REMOVE_NAME);
+        require!(
+            !need_file_actions || checkpoint_read_schema.contains(SIDECAR_NAME),
+            Error::invalid_checkpoint(
+                "If the checkpoint read schema contains file actions, it must contain the sidecar column"
+            )
+        );
+
+        let checkpoint_file_meta: Vec<_> = self
             .checkpoint_parts
             .iter()
             .map(|f| f.location.clone())
             .collect();
-        let checkpoint_stream = engine
-            .get_parquet_handler()
-            .read_parquet_files(&checkpoint_parts, checkpoint_read_schema, meta_predicate)?
-            .map_ok(|batch| (batch, false));
 
-        Ok(commit_stream.chain(checkpoint_stream))
+        let parquet_handler = engine.parquet_handler();
+
+        // Historically, we had a shared file reader trait for JSON and Parquet handlers,
+        // but it was removed to avoid unnecessary coupling. This is a concrete case
+        // where it *could* have been useful, but for now, we're keeping them separate.
+        // If similar patterns start appearing elsewhere, we should reconsider that decision.
+        let actions = match self.checkpoint_parts.first() {
+            Some(parsed_log_path) if parsed_log_path.extension == "json" => {
+                engine.json_handler().read_json_files(
+                    &checkpoint_file_meta,
+                    checkpoint_read_schema.clone(),
+                    meta_predicate.clone(),
+                )?
+            }
+            Some(parsed_log_path) if parsed_log_path.extension == "parquet" => parquet_handler
+                .read_parquet_files(
+                    &checkpoint_file_meta,
+                    checkpoint_read_schema.clone(),
+                    meta_predicate.clone(),
+                )?,
+            Some(parsed_log_path) => {
+                return Err(Error::generic(format!(
+                    "Unsupported checkpoint file type: {}",
+                    parsed_log_path.extension,
+                )));
+            }
+            // This is the case when there are no checkpoints in the log segment
+            // so we return an empty iterator
+            None => Box::new(std::iter::empty()),
+        };
+
+        let log_root = self.log_root.clone();
+
+        let actions_iter = actions
+            .map(move |checkpoint_batch_result| -> DeltaResult<_> {
+                let checkpoint_batch = checkpoint_batch_result?;
+                // This closure maps the checkpoint batch to an iterator of batches
+                // by chaining the checkpoint batch with sidecar batches if they exist.
+
+                // 1. In the case where the schema does not contain file actions, we return the
+                //    checkpoint batch directly as sidecar files only have to be read when the
+                //    schema contains add/remove action.
+                // 2. Multi-part checkpoint batches never have sidecar actions, so the batch is
+                //    returned as-is.
+                let sidecar_content = if need_file_actions && checkpoint_file_meta.len() == 1 {
+                    Self::process_sidecars(
+                        parquet_handler.clone(), // cheap Arc clone
+                        log_root.clone(),
+                        checkpoint_batch.as_ref(),
+                        checkpoint_read_schema.clone(),
+                        meta_predicate.clone(),
+                    )?
+                } else {
+                    None
+                };
+
+                let combined_batches = std::iter::once(Ok(checkpoint_batch))
+                    .chain(sidecar_content.into_iter().flatten())
+                    // The boolean flag indicates whether the batch originated from a commit file
+                    // (true) or a checkpoint file (false).
+                    .map_ok(|sidecar_batch| (sidecar_batch, false));
+
+                Ok(combined_batches)
+            })
+            .flatten_ok()
+            .map(|result| result?); // result-result to result
+
+        Ok(actions_iter)
     }
 
-    // Get the most up-to-date Protocol and Metadata actions
-    pub(crate) fn read_metadata(&self, engine: &dyn Engine) -> DeltaResult<(Metadata, Protocol)> {
+    /// Processes sidecar files for the given checkpoint batch.
+    ///
+    /// This function extracts any sidecar file references from the provided batch.
+    /// Each sidecar file is read and an iterator of file action batches is returned
+    fn process_sidecars(
+        parquet_handler: Arc<dyn ParquetHandler>,
+        log_root: Url,
+        batch: &dyn EngineData,
+        checkpoint_read_schema: SchemaRef,
+        meta_predicate: Option<ExpressionRef>,
+    ) -> DeltaResult<Option<impl Iterator<Item = DeltaResult<Box<dyn EngineData>>> + Send>> {
+        // Visit the rows of the checkpoint batch to extract sidecar file references
+        let mut visitor = SidecarVisitor::default();
+        visitor.visit_rows_of(batch)?;
+
+        // If there are no sidecar files, return early
+        if visitor.sidecars.is_empty() {
+            return Ok(None);
+        }
+
+        let sidecar_files: Vec<_> = visitor
+            .sidecars
+            .iter()
+            .map(|sidecar| sidecar.to_filemeta(&log_root))
+            .try_collect()?;
+
+        // Read the sidecar files and return an iterator of sidecar file batches
+        Ok(Some(parquet_handler.read_parquet_files(
+            &sidecar_files,
+            checkpoint_read_schema,
+            meta_predicate,
+        )?))
+    }
+
+    // Do a lightweight protocol+metadata log replay to find the latest Protocol and Metadata in
+    // the LogSegment
+    pub(crate) fn protocol_and_metadata(
+        &self,
+        engine: &dyn Engine,
+    ) -> DeltaResult<(Option<Metadata>, Option<Protocol>)> {
         let data_batches = self.replay_for_metadata(engine)?;
         let (mut metadata_opt, mut protocol_opt) = (None, None);
         for batch in data_batches {
@@ -243,7 +385,12 @@ impl LogSegment {
                 break;
             }
         }
-        match (metadata_opt, protocol_opt) {
+        Ok((metadata_opt, protocol_opt))
+    }
+
+    // Get the most up-to-date Protocol and Metadata actions
+    pub(crate) fn read_metadata(&self, engine: &dyn Engine) -> DeltaResult<(Metadata, Protocol)> {
+        match self.protocol_and_metadata(engine)? {
             (Some(m), Some(p)) => Ok((m, p)),
             (None, Some(_)) => Err(Error::MissingMetadata),
             (Some(_), None) => Err(Error::MissingProtocol),
@@ -265,7 +412,7 @@ impl LogSegment {
             )))
         });
         // read the same protocol and metadata schema for both commits and checkpoints
-        self.replay(engine, schema.clone(), schema, META_PREDICATE.clone())
+        self.read_actions(engine, schema.clone(), schema, META_PREDICATE.clone())
     }
 }
 
@@ -274,9 +421,9 @@ impl LogSegment {
 /// not specified, the files will begin from version number 0. If `end_version` is not specified, files up to
 /// the most recent version will be included.
 ///
-/// Note: this calls [`FileSystemClient::list_from`] to get the list of log files.
+/// Note: this calls [`StorageHandler::list_from`] to get the list of log files.
 fn list_log_files(
-    fs_client: &dyn FileSystemClient,
+    storage: &dyn StorageHandler,
     log_root: &Url,
     start_version: impl Into<Option<Version>>,
     end_version: impl Into<Option<Version>>,
@@ -286,7 +433,7 @@ fn list_log_files(
     let version_prefix = format!("{:020}", start_version);
     let start_from = log_root.join(&version_prefix)?;
 
-    Ok(fs_client
+    Ok(storage
         .list_from(&start_from)?
         .map(|meta| ParsedLogPath::try_from(meta?))
         // TODO this filters out .crc files etc which start with "." - how do we want to use these kind of files?
@@ -296,13 +443,17 @@ fn list_log_files(
             Err(_) => true,
         }))
 }
+
 /// List all commit and checkpoint files with versions above the provided `start_version` (inclusive).
 /// If successful, this returns a tuple `(ascending_commit_files, checkpoint_parts)` of type
 /// `(Vec<ParsedLogPath>, Vec<ParsedLogPath>)`. The commit files are guaranteed to be sorted in
 /// ascending order by version. The elements of `checkpoint_parts` are all the parts of the same
 /// checkpoint. Checkpoint parts share the same version.
-fn list_log_files_with_version(
-    fs_client: &dyn FileSystemClient,
+// TODO: encode some of these guarantees in the output types. e.g. we could have:
+// - SortedCommitFiles: Vec<ParsedLogPath>, is_ascending: bool, end_version: Version
+// - CheckpointParts: Vec<ParsedLogPath>, checkpoint_version: Version (guarantee all same version)
+pub(crate) fn list_log_files_with_version(
+    storage: &dyn StorageHandler,
     log_root: &Url,
     start_version: Option<Version>,
     end_version: Option<Version>,
@@ -310,7 +461,7 @@ fn list_log_files_with_version(
     // We expect 10 commit files per checkpoint, so start with that size. We could adjust this based
     // on config at some point
 
-    let log_files = list_log_files(fs_client, log_root, start_version, end_version)?;
+    let log_files = list_log_files(storage, log_root, start_version, end_version)?;
 
     log_files.process_results(|iter| {
         let mut commit_files = Vec::with_capacity(10);
@@ -399,13 +550,13 @@ fn group_checkpoint_parts(parts: Vec<ParsedLogPath>) -> HashMap<u32, Vec<ParsedL
 /// the returned [`ParsedLogPath`]s will have a version less than or equal to the `end_version`.
 /// See [`list_log_files_with_version`] for details on the return type.
 fn list_log_files_with_checkpoint(
-    checkpoint_metadata: &CheckpointMetadata,
-    fs_client: &dyn FileSystemClient,
+    checkpoint_metadata: &LastCheckpointHint,
+    storage: &dyn StorageHandler,
     log_root: &Url,
     end_version: Option<Version>,
 ) -> DeltaResult<(Vec<ParsedLogPath>, Vec<ParsedLogPath>)> {
     let (commit_files, checkpoint_parts) = list_log_files_with_version(
-        fs_client,
+        storage,
         log_root,
         Some(checkpoint_metadata.version),
         end_version,
diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs
index 5db1c4581..f94b7b736 100644
--- a/kernel/src/log_segment/tests.rs
+++ b/kernel/src/log_segment/tests.rs
@@ -1,15 +1,33 @@
+use std::sync::LazyLock;
 use std::{path::PathBuf, sync::Arc};
 
+use futures::executor::block_on;
 use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use url::Url;
 
+use crate::actions::visitors::AddVisitor;
+use crate::actions::{
+    get_log_add_schema, get_log_schema, Add, Sidecar, ADD_NAME, METADATA_NAME, REMOVE_NAME,
+    SIDECAR_NAME,
+};
+use crate::engine::arrow_data::ArrowEngineData;
 use crate::engine::default::executor::tokio::TokioBackgroundExecutor;
-use crate::engine::default::filesystem::ObjectStoreFileSystemClient;
+use crate::engine::default::filesystem::ObjectStoreStorageHandler;
+use crate::engine::default::DefaultEngine;
 use crate::engine::sync::SyncEngine;
 use crate::log_segment::LogSegment;
-use crate::snapshot::CheckpointMetadata;
-use crate::{FileSystemClient, Table};
+use crate::parquet::arrow::ArrowWriter;
+use crate::path::ParsedLogPath;
+use crate::scan::test_utils::{
+    add_batch_simple, add_batch_with_remove, sidecar_batch_with_given_paths,
+};
+use crate::snapshot::LastCheckpointHint;
+use crate::utils::test_utils::{assert_batch_matches, Action};
+use crate::{
+    DeltaResult, Engine, EngineData, Expression, ExpressionRef, FileMeta, RowVisitor,
+    StorageHandler, Table,
+};
 use test_utils::delta_path_for_version;
 
 // NOTE: In addition to testing the meta-predicate for metadata replay, this test also verifies
@@ -63,53 +81,137 @@ fn delta_path_for_multipart_checkpoint(version: u64, part_num: u32, num_parts: u
 }
 
 // Utility method to build a log using a list of log paths and an optional checkpoint hint. The
-// CheckpointMetadata is written to `_delta_log/_last_checkpoint`.
+// LastCheckpointHint is written to `_delta_log/_last_checkpoint`.
 fn build_log_with_paths_and_checkpoint(
     paths: &[Path],
-    checkpoint_metadata: Option<&CheckpointMetadata>,
-) -> (Box<dyn FileSystemClient>, Url) {
+    checkpoint_metadata: Option<&LastCheckpointHint>,
+) -> (Box<dyn StorageHandler>, Url) {
     let store = Arc::new(InMemory::new());
 
     let data = bytes::Bytes::from("kernel-data");
 
     // add log files to store
-    tokio::runtime::Runtime::new()
-        .expect("create tokio runtime")
-        .block_on(async {
-            for path in paths {
-                store
-                    .put(path, data.clone().into())
-                    .await
-                    .expect("put log file in store");
-            }
-            if let Some(checkpoint_metadata) = checkpoint_metadata {
-                let checkpoint_str =
-                    serde_json::to_string(checkpoint_metadata).expect("Serialize checkpoint");
-                store
-                    .put(
-                        &Path::from("_delta_log/_last_checkpoint"),
-                        checkpoint_str.into(),
-                    )
-                    .await
-                    .expect("Write _last_checkpoint");
-            }
-        });
-
-    let client = ObjectStoreFileSystemClient::new(
+    block_on(async {
+        for path in paths {
+            store
+                .put(path, data.clone().into())
+                .await
+                .expect("put log file in store");
+        }
+        if let Some(checkpoint_metadata) = checkpoint_metadata {
+            let checkpoint_str =
+                serde_json::to_string(checkpoint_metadata).expect("Serialize checkpoint");
+            store
+                .put(
+                    &Path::from("_delta_log/_last_checkpoint"),
+                    checkpoint_str.into(),
+                )
+                .await
+                .expect("Write _last_checkpoint");
+        }
+    });
+
+    let storage = ObjectStoreStorageHandler::new(
         store,
         false, // don't have ordered listing
-        Path::from("/"),
         Arc::new(TokioBackgroundExecutor::new()),
     );
 
     let table_root = Url::parse("memory:///").expect("valid url");
     let log_root = table_root.join("_delta_log/").unwrap();
-    (Box::new(client), log_root)
+    (Box::new(storage), log_root)
+}
+
+// Create an in-memory store and return the store and the URL for the store's _delta_log directory.
+fn new_in_memory_store() -> (Arc<InMemory>, Url) {
+    (
+        Arc::new(InMemory::new()),
+        Url::parse("memory:///")
+            .unwrap()
+            .join("_delta_log/")
+            .unwrap(),
+    )
+}
+
+// Writes a record batch obtained from engine data to the in-memory store at a given path.
+fn write_parquet_to_store(
+    store: &Arc<InMemory>,
+    path: String,
+    data: Box<dyn EngineData>,
+) -> DeltaResult<()> {
+    let batch = ArrowEngineData::try_from_engine_data(data)?;
+    let record_batch = batch.record_batch();
+
+    let mut buffer = vec![];
+    let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), None)?;
+    writer.write(record_batch)?;
+    writer.close()?;
+
+    block_on(async { store.put(&Path::from(path), buffer.into()).await })?;
+
+    Ok(())
+}
+
+/// Writes all actions to a _delta_log parquet checkpoint file in the store.
+/// This function formats the provided filename into the _delta_log directory.
+pub(crate) fn add_checkpoint_to_store(
+    store: &Arc<InMemory>,
+    data: Box<dyn EngineData>,
+    filename: &str,
+) -> DeltaResult<()> {
+    let path = format!("_delta_log/{}", filename);
+    write_parquet_to_store(store, path, data)
+}
+
+/// Writes all actions to a _delta_log/_sidecars file in the store.
+/// This function formats the provided filename into the _sidecars subdirectory.
+fn add_sidecar_to_store(
+    store: &Arc<InMemory>,
+    data: Box<dyn EngineData>,
+    filename: &str,
+) -> DeltaResult<()> {
+    let path = format!("_delta_log/_sidecars/{}", filename);
+    write_parquet_to_store(store, path, data)
+}
+
+/// Writes all actions to a _delta_log json checkpoint file in the store.
+/// This function formats the provided filename into the _delta_log directory.
+fn write_json_to_store(
+    store: &Arc<InMemory>,
+    actions: Vec<Action>,
+    filename: &str,
+) -> DeltaResult<()> {
+    let json_lines: Vec<String> = actions
+        .into_iter()
+        .map(|action| serde_json::to_string(&action).expect("action to string"))
+        .collect();
+    let content = json_lines.join("\n");
+    let checkpoint_path = format!("_delta_log/{}", filename);
+
+    tokio::runtime::Runtime::new()
+        .expect("create tokio runtime")
+        .block_on(async {
+            store
+                .put(&Path::from(checkpoint_path), content.into())
+                .await
+        })?;
+
+    Ok(())
+}
+
+fn create_log_path(path: &str) -> ParsedLogPath<FileMeta> {
+    ParsedLogPath::try_from(FileMeta {
+        location: Url::parse(path).expect("Invalid file URL"),
+        last_modified: 0,
+        size: 0,
+    })
+    .unwrap()
+    .unwrap()
 }
 
 #[test]
-fn build_snapshot_with_unsupported_uuid_checkpoint() {
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+fn build_snapshot_with_uuid_checkpoint_parquet() {
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -124,21 +226,90 @@ fn build_snapshot_with_unsupported_uuid_checkpoint() {
         None,
     );
 
-    let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap();
+    let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
     assert_eq!(checkpoint_parts.len(), 1);
-    assert_eq!(checkpoint_parts[0].version, 3);
+    assert_eq!(checkpoint_parts[0].version, 5);
 
     let versions = commit_files.into_iter().map(|x| x.version).collect_vec();
-    let expected_versions = vec![4, 5, 6, 7];
+    let expected_versions = vec![6, 7];
+    assert_eq!(versions, expected_versions);
+}
+
+#[test]
+fn build_snapshot_with_uuid_checkpoint_json() {
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
+        &[
+            delta_path_for_version(0, "json"),
+            delta_path_for_version(1, "checkpoint.parquet"),
+            delta_path_for_version(2, "json"),
+            delta_path_for_version(3, "checkpoint.parquet"),
+            delta_path_for_version(4, "json"),
+            delta_path_for_version(5, "json"),
+            delta_path_for_version(5, "checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.json"),
+            delta_path_for_version(6, "json"),
+            delta_path_for_version(7, "json"),
+        ],
+        None,
+    );
+
+    let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap();
+    let commit_files = log_segment.ascending_commit_files;
+    let checkpoint_parts = log_segment.checkpoint_parts;
+
+    assert_eq!(checkpoint_parts.len(), 1);
+    assert_eq!(checkpoint_parts[0].version, 5);
+
+    let versions = commit_files.into_iter().map(|x| x.version).collect_vec();
+    let expected_versions = vec![6, 7];
     assert_eq!(versions, expected_versions);
 }
 
+#[test]
+fn build_snapshot_with_correct_last_uuid_checkpoint() {
+    let checkpoint_metadata = LastCheckpointHint {
+        version: 5,
+        size: 10,
+        parts: Some(1),
+        size_in_bytes: None,
+        num_of_add_files: None,
+        checkpoint_schema: None,
+        checksum: None,
+    };
+
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
+        &[
+            delta_path_for_version(0, "json"),
+            delta_path_for_version(1, "checkpoint.parquet"),
+            delta_path_for_version(1, "json"),
+            delta_path_for_version(2, "json"),
+            delta_path_for_version(3, "checkpoint.parquet"),
+            delta_path_for_version(3, "json"),
+            delta_path_for_version(4, "json"),
+            delta_path_for_version(5, "checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet"),
+            delta_path_for_version(5, "json"),
+            delta_path_for_version(6, "json"),
+            delta_path_for_version(7, "json"),
+        ],
+        Some(&checkpoint_metadata),
+    );
+
+    let log_segment =
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap();
+    let commit_files = log_segment.ascending_commit_files;
+    let checkpoint_parts = log_segment.checkpoint_parts;
+
+    assert_eq!(checkpoint_parts.len(), 1);
+    assert_eq!(commit_files.len(), 2);
+    assert_eq!(checkpoint_parts[0].version, 5);
+    assert_eq!(commit_files[0].version, 6);
+    assert_eq!(commit_files[1].version, 7);
+}
 #[test]
 fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() {
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_multipart_checkpoint(1, 1, 3),
@@ -162,7 +333,7 @@ fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() {
         None,
     );
 
-    let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap();
+    let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -176,7 +347,7 @@ fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() {
 
 #[test]
 fn build_snapshot_with_out_of_date_last_checkpoint() {
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 3,
         size: 10,
         parts: None,
@@ -186,7 +357,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint() {
         checksum: None,
     };
 
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -201,7 +372,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint() {
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap();
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -213,7 +384,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint() {
 }
 #[test]
 fn build_snapshot_with_correct_last_multipart_checkpoint() {
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 5,
         size: 10,
         parts: Some(3),
@@ -223,7 +394,7 @@ fn build_snapshot_with_correct_last_multipart_checkpoint() {
         checksum: None,
     };
 
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -243,7 +414,7 @@ fn build_snapshot_with_correct_last_multipart_checkpoint() {
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap();
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -256,7 +427,7 @@ fn build_snapshot_with_correct_last_multipart_checkpoint() {
 
 #[test]
 fn build_snapshot_with_missing_checkpoint_part_from_hint_fails() {
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 5,
         size: 10,
         parts: Some(3),
@@ -266,7 +437,7 @@ fn build_snapshot_with_missing_checkpoint_part_from_hint_fails() {
         checksum: None,
     };
 
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -286,12 +457,12 @@ fn build_snapshot_with_missing_checkpoint_part_from_hint_fails() {
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None);
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None);
     assert!(log_segment.is_err())
 }
 #[test]
 fn build_snapshot_with_bad_checkpoint_hint_fails() {
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 5,
         size: 10,
         parts: Some(1),
@@ -301,7 +472,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() {
         checksum: None,
     };
 
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -320,7 +491,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() {
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None);
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None);
     assert!(log_segment.is_err())
 }
 
@@ -328,7 +499,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() {
 fn build_snapshot_with_missing_checkpoint_part_no_hint() {
     // Part 2 of 3 is missing from checkpoint 5. The Snapshot should be made of checkpoint
     // number 3 and commit files 4 to 7.
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -347,7 +518,7 @@ fn build_snapshot_with_missing_checkpoint_part_no_hint() {
         None,
     );
 
-    let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap();
+    let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, None).unwrap();
 
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
@@ -365,7 +536,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo
     // When the _last_checkpoint is out of date and the most recent checkpoint is incomplete, the
     // Snapshot should be made of the most recent complete checkpoint and the commit files that
     // follow it.
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 3,
         size: 10,
         parts: None,
@@ -375,7 +546,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo
         checksum: None,
     };
 
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -393,7 +564,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap();
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -407,7 +578,7 @@ fn build_snapshot_with_out_of_date_last_checkpoint_and_incomplete_recent_checkpo
 
 #[test]
 fn build_snapshot_without_checkpoints() {
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "json"),
@@ -426,7 +597,7 @@ fn build_snapshot_without_checkpoints() {
 
     ///////// Specify no checkpoint or end version /////////
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root.clone(), None, None).unwrap();
+        LogSegment::for_snapshot(storage.as_ref(), log_root.clone(), None, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -439,7 +610,7 @@ fn build_snapshot_without_checkpoints() {
     assert_eq!(versions, expected_versions);
 
     ///////// Specify  only end version /////////
-    let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, Some(2)).unwrap();
+    let log_segment = LogSegment::for_snapshot(storage.as_ref(), log_root, None, Some(2)).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -454,7 +625,7 @@ fn build_snapshot_without_checkpoints() {
 
 #[test]
 fn build_snapshot_with_checkpoint_greater_than_time_travel_version() {
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 5,
         size: 10,
         parts: None,
@@ -463,7 +634,7 @@ fn build_snapshot_with_checkpoint_greater_than_time_travel_version() {
         checkpoint_schema: None,
         checksum: None,
     };
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "json"),
@@ -481,7 +652,7 @@ fn build_snapshot_with_checkpoint_greater_than_time_travel_version() {
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap();
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -494,7 +665,7 @@ fn build_snapshot_with_checkpoint_greater_than_time_travel_version() {
 
 #[test]
 fn build_snapshot_with_start_checkpoint_and_time_travel_version() {
-    let checkpoint_metadata = CheckpointMetadata {
+    let checkpoint_metadata = LastCheckpointHint {
         version: 3,
         size: 10,
         parts: None,
@@ -504,7 +675,7 @@ fn build_snapshot_with_start_checkpoint_and_time_travel_version() {
         checksum: None,
     };
 
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "checkpoint.parquet"),
@@ -519,7 +690,7 @@ fn build_snapshot_with_start_checkpoint_and_time_travel_version() {
     );
 
     let log_segment =
-        LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap();
+        LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_metadata, Some(4)).unwrap();
 
     assert_eq!(log_segment.checkpoint_parts[0].version, 3);
     assert_eq!(log_segment.ascending_commit_files.len(), 1);
@@ -527,7 +698,7 @@ fn build_snapshot_with_start_checkpoint_and_time_travel_version() {
 }
 #[test]
 fn build_table_changes_with_commit_versions() {
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "json"),
@@ -547,7 +718,7 @@ fn build_table_changes_with_commit_versions() {
     ///////// Specify start version and end version /////////
 
     let log_segment =
-        LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 2, 5).unwrap();
+        LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 2, 5).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -561,7 +732,7 @@ fn build_table_changes_with_commit_versions() {
 
     ///////// Start version and end version are the same /////////
     let log_segment =
-        LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 0, Some(0)).unwrap();
+        LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 0, Some(0)).unwrap();
 
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
@@ -573,7 +744,7 @@ fn build_table_changes_with_commit_versions() {
     assert_eq!(commit_files[0].version, 0);
 
     ///////// Specify no start or end version /////////
-    let log_segment = LogSegment::for_table_changes(client.as_ref(), log_root, 0, None).unwrap();
+    let log_segment = LogSegment::for_table_changes(storage.as_ref(), log_root, 0, None).unwrap();
     let commit_files = log_segment.ascending_commit_files;
     let checkpoint_parts = log_segment.checkpoint_parts;
 
@@ -589,7 +760,7 @@ fn build_table_changes_with_commit_versions() {
 #[test]
 fn test_non_contiguous_log() {
     // Commit with version 1 is missing
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(2, "json"),
@@ -597,26 +768,494 @@ fn test_non_contiguous_log() {
         None,
     );
 
-    let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 0, None);
+    let log_segment_res =
+        LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 0, None);
     assert!(log_segment_res.is_err());
 
-    let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root.clone(), 1, None);
+    let log_segment_res =
+        LogSegment::for_table_changes(storage.as_ref(), log_root.clone(), 1, None);
     assert!(log_segment_res.is_err());
 
-    let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root, 0, Some(1));
+    let log_segment_res = LogSegment::for_table_changes(storage.as_ref(), log_root, 0, Some(1));
     assert!(log_segment_res.is_err());
 }
 
 #[test]
 fn table_changes_fails_with_larger_start_version_than_end() {
     // Commit with version 1 is missing
-    let (client, log_root) = build_log_with_paths_and_checkpoint(
+    let (storage, log_root) = build_log_with_paths_and_checkpoint(
         &[
             delta_path_for_version(0, "json"),
             delta_path_for_version(1, "json"),
         ],
         None,
     );
-    let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root, 1, Some(0));
+    let log_segment_res = LogSegment::for_table_changes(storage.as_ref(), log_root, 1, Some(0));
     assert!(log_segment_res.is_err());
 }
+#[test]
+fn test_sidecar_to_filemeta_valid_paths() -> DeltaResult<()> {
+    let log_root = Url::parse("file:///var/_delta_log/")?;
+    let test_cases = [
+        (
+            "example.parquet",
+            "file:///var/_delta_log/_sidecars/example.parquet",
+        ),
+        (
+            "file:///var/_delta_log/_sidecars/example.parquet",
+            "file:///var/_delta_log/_sidecars/example.parquet",
+        ),
+        (
+            "test/test/example.parquet",
+            "file:///var/_delta_log/_sidecars/test/test/example.parquet",
+        ),
+    ];
+
+    for (input_path, expected_url) in test_cases.into_iter() {
+        let sidecar = Sidecar {
+            path: expected_url.to_string(),
+            modification_time: 0,
+            size_in_bytes: 1000,
+            tags: None,
+        };
+
+        let filemeta = sidecar.to_filemeta(&log_root)?;
+        assert_eq!(
+            filemeta.location.as_str(),
+            expected_url,
+            "Mismatch for input path: {}",
+            input_path
+        );
+    }
+    Ok(())
+}
+
+#[test]
+fn test_checkpoint_batch_with_no_sidecars_returns_none() -> DeltaResult<()> {
+    let (_, log_root) = new_in_memory_store();
+    let engine = Arc::new(SyncEngine::new());
+    let checkpoint_batch = add_batch_simple(get_log_schema().clone());
+
+    let mut iter = LogSegment::process_sidecars(
+        engine.parquet_handler(),
+        log_root,
+        checkpoint_batch.as_ref(),
+        get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?,
+        None,
+    )?
+    .into_iter()
+    .flatten();
+
+    // Assert no batches are returned
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+#[test]
+fn test_checkpoint_batch_with_sidecars_returns_sidecar_batches() -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+    let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?;
+
+    add_sidecar_to_store(
+        &store,
+        add_batch_simple(read_schema.clone()),
+        "sidecarfile1.parquet",
+    )?;
+    add_sidecar_to_store(
+        &store,
+        add_batch_with_remove(read_schema.clone()),
+        "sidecarfile2.parquet",
+    )?;
+
+    let checkpoint_batch = sidecar_batch_with_given_paths(
+        vec!["sidecarfile1.parquet", "sidecarfile2.parquet"],
+        read_schema.clone(),
+    );
+
+    let mut iter = LogSegment::process_sidecars(
+        engine.parquet_handler(),
+        log_root,
+        checkpoint_batch.as_ref(),
+        read_schema.clone(),
+        None,
+    )?
+    .into_iter()
+    .flatten();
+
+    // Assert the correctness of batches returned
+    assert_batch_matches(iter.next().unwrap()?, add_batch_simple(read_schema.clone()));
+    assert_batch_matches(iter.next().unwrap()?, add_batch_with_remove(read_schema));
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+#[test]
+fn test_checkpoint_batch_with_sidecar_files_that_do_not_exist() -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+
+    let checkpoint_batch = sidecar_batch_with_given_paths(
+        vec!["sidecarfile1.parquet", "sidecarfile2.parquet"],
+        get_log_schema().clone(),
+    );
+
+    let mut iter = LogSegment::process_sidecars(
+        engine.parquet_handler(),
+        log_root,
+        checkpoint_batch.as_ref(),
+        get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?,
+        None,
+    )?
+    .into_iter()
+    .flatten();
+
+    // Assert that an error is returned when trying to read sidecar files that do not exist
+    let err = iter.next().unwrap();
+    assert!(err.is_err());
+
+    Ok(())
+}
+
+#[test]
+fn test_reading_sidecar_files_with_predicate() -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+    let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?;
+
+    let checkpoint_batch =
+        sidecar_batch_with_given_paths(vec!["sidecarfile1.parquet"], read_schema.clone());
+
+    // Add a sidecar file with only add actions
+    add_sidecar_to_store(
+        &store,
+        add_batch_simple(read_schema.clone()),
+        "sidecarfile1.parquet",
+    )?;
+
+    // Filter out sidecar files that do not contain remove actions
+    let remove_predicate: LazyLock<Option<ExpressionRef>> = LazyLock::new(|| {
+        Some(Arc::new(
+            Expression::column([REMOVE_NAME, "path"]).is_not_null(),
+        ))
+    });
+
+    let mut iter = LogSegment::process_sidecars(
+        engine.parquet_handler(),
+        log_root,
+        checkpoint_batch.as_ref(),
+        read_schema.clone(),
+        remove_predicate.clone(),
+    )?
+    .into_iter()
+    .flatten();
+
+    // As the sidecar batch contains only add actions, the batch should be filtered out
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+#[test]
+fn test_create_checkpoint_stream_errors_when_schema_has_remove_but_no_sidecar_action(
+) -> DeltaResult<()> {
+    let engine = SyncEngine::new();
+    let log_root = Url::parse("s3://example-bucket/logs/")?;
+
+    // Create the stream over checkpoint batches.
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![create_log_path("file:///00000000000000000001.parquet")],
+        log_root,
+        None,
+    )?;
+    let result = log_segment.create_checkpoint_stream(
+        &engine,
+        get_log_schema().project(&[REMOVE_NAME])?,
+        None,
+    );
+
+    // Errors because the schema has an REMOVE action but no SIDECAR action.
+    assert!(result.is_err());
+
+    Ok(())
+}
+
+#[test]
+fn test_create_checkpoint_stream_errors_when_schema_has_add_but_no_sidecar_action(
+) -> DeltaResult<()> {
+    let engine = SyncEngine::new();
+    let log_root = Url::parse("s3://example-bucket/logs/")?;
+
+    // Create the stream over checkpoint batches.
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![create_log_path("file:///00000000000000000001.parquet")],
+        log_root,
+        None,
+    )?;
+    let result = log_segment.create_checkpoint_stream(&engine, get_log_add_schema().clone(), None);
+
+    // Errors because the schema has an ADD action but no SIDECAR action.
+    assert!(result.is_err());
+
+    Ok(())
+}
+
+#[test]
+fn test_create_checkpoint_stream_returns_checkpoint_batches_as_is_if_schema_has_no_file_actions(
+) -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+    add_checkpoint_to_store(
+        &store,
+        // Create a checkpoint batch with sidecar actions to verify that the sidecar actions are not read.
+        sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], get_log_schema().clone()),
+        "00000000000000000001.checkpoint.parquet",
+    )?;
+
+    let checkpoint_one_file = log_root
+        .join("00000000000000000001.checkpoint.parquet")?
+        .to_string();
+
+    let v2_checkpoint_read_schema = get_log_schema().project(&[METADATA_NAME])?;
+
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![create_log_path(&checkpoint_one_file)],
+        log_root,
+        None,
+    )?;
+    let mut iter =
+        log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?;
+
+    // Assert that the first batch returned is from reading checkpoint file 1
+    let (first_batch, is_log_batch) = iter.next().unwrap()?;
+    assert!(!is_log_batch);
+    assert_batch_matches(
+        first_batch,
+        sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], v2_checkpoint_read_schema),
+    );
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+#[test]
+fn test_create_checkpoint_stream_returns_checkpoint_batches_if_checkpoint_is_multi_part(
+) -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+
+    // Multi-part checkpoints should never contain sidecar actions.
+    // This test intentionally includes batches with sidecar actions in multi-part checkpoints
+    // to verify that the reader does not process them. Instead, the reader should short-circuit
+    // and return the checkpoint batches as-is when encountering a multi-part checkpoint.
+    // Note: This is a test-only scenario; real tables should never have multi-part
+    // checkpoints with sidecar actions.
+    let checkpoint_part_1 = "00000000000000000001.checkpoint.0000000001.0000000002.parquet";
+    let checkpoint_part_2 = "00000000000000000001.checkpoint.0000000002.0000000002.parquet";
+
+    add_checkpoint_to_store(
+        &store,
+        sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], get_log_schema().clone()),
+        checkpoint_part_1,
+    )?;
+    add_checkpoint_to_store(
+        &store,
+        sidecar_batch_with_given_paths(vec!["sidecar2.parquet"], get_log_schema().clone()),
+        checkpoint_part_2,
+    )?;
+
+    let checkpoint_one_file = log_root.join(checkpoint_part_1)?.to_string();
+    let checkpoint_two_file = log_root.join(checkpoint_part_2)?.to_string();
+
+    let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?;
+
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![
+            create_log_path(&checkpoint_one_file),
+            create_log_path(&checkpoint_two_file),
+        ],
+        log_root,
+        None,
+    )?;
+    let mut iter =
+        log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?;
+
+    // Assert the correctness of batches returned
+    for expected_sidecar in ["sidecar1.parquet", "sidecar2.parquet"].iter() {
+        let (batch, is_log_batch) = iter.next().unwrap()?;
+        assert!(!is_log_batch);
+        assert_batch_matches(
+            batch,
+            sidecar_batch_with_given_paths(
+                vec![expected_sidecar],
+                v2_checkpoint_read_schema.clone(),
+            ),
+        );
+    }
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+#[test]
+fn test_create_checkpoint_stream_reads_parquet_checkpoint_batch_without_sidecars() -> DeltaResult<()>
+{
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+
+    add_checkpoint_to_store(
+        &store,
+        add_batch_simple(get_log_schema().clone()),
+        "00000000000000000001.checkpoint.parquet",
+    )?;
+
+    let checkpoint_one_file = log_root
+        .join("00000000000000000001.checkpoint.parquet")?
+        .to_string();
+
+    let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?;
+
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![create_log_path(&checkpoint_one_file)],
+        log_root,
+        None,
+    )?;
+    let mut iter =
+        log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?;
+
+    // Assert that the first batch returned is from reading checkpoint file 1
+    let (first_batch, is_log_batch) = iter.next().unwrap()?;
+    assert!(!is_log_batch);
+    assert_batch_matches(first_batch, add_batch_simple(v2_checkpoint_read_schema));
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+#[test]
+fn test_create_checkpoint_stream_reads_json_checkpoint_batch_without_sidecars() -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+
+    write_json_to_store(
+        &store,
+        vec![Action::Add(Add {
+            path: "fake_path_1".into(),
+            data_change: true,
+            ..Default::default()
+        })],
+        "00000000000000000001.checkpoint.json",
+    )?;
+
+    let checkpoint_one_file = log_root
+        .join("00000000000000000001.checkpoint.json")?
+        .to_string();
+
+    let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?;
+
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![create_log_path(&checkpoint_one_file)],
+        log_root,
+        None,
+    )?;
+    let mut iter =
+        log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema, None)?;
+
+    // Assert that the first batch returned is from reading checkpoint file 1
+    let (first_batch, is_log_batch) = iter.next().unwrap()?;
+    assert!(!is_log_batch);
+    let mut visitor = AddVisitor::default();
+    visitor.visit_rows_of(&*first_batch)?;
+    assert!(visitor.adds.len() == 1);
+    assert!(visitor.adds[0].path == "fake_path_1");
+
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
+
+// Tests the end-to-end process of creating a checkpoint stream.
+// Verifies that:
+// - The checkpoint file is read and produces batches containing references to sidecar files.
+// - As sidecar references are present, the corresponding sidecar files are processed correctly.
+// - Batches from both the checkpoint file and sidecar files are returned.
+// - Each returned batch is correctly flagged with is_log_batch set to false
+#[test]
+fn test_create_checkpoint_stream_reads_checkpoint_file_and_returns_sidecar_batches(
+) -> DeltaResult<()> {
+    let (store, log_root) = new_in_memory_store();
+    let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new()));
+
+    add_checkpoint_to_store(
+        &store,
+        sidecar_batch_with_given_paths(
+            vec!["sidecarfile1.parquet", "sidecarfile2.parquet"],
+            get_log_schema().clone(),
+        ),
+        "00000000000000000001.checkpoint.parquet",
+    )?;
+
+    add_sidecar_to_store(
+        &store,
+        add_batch_simple(get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?),
+        "sidecarfile1.parquet",
+    )?;
+    add_sidecar_to_store(
+        &store,
+        add_batch_with_remove(get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?),
+        "sidecarfile2.parquet",
+    )?;
+
+    let checkpoint_file_path = log_root
+        .join("00000000000000000001.checkpoint.parquet")?
+        .to_string();
+
+    let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?;
+
+    let log_segment = LogSegment::try_new(
+        vec![],
+        vec![create_log_path(&checkpoint_file_path)],
+        log_root,
+        None,
+    )?;
+    let mut iter =
+        log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?;
+
+    // Assert that the first batch returned is from reading checkpoint file 1
+    let (first_batch, is_log_batch) = iter.next().unwrap()?;
+    assert!(!is_log_batch);
+    assert_batch_matches(
+        first_batch,
+        sidecar_batch_with_given_paths(
+            vec!["sidecarfile1.parquet", "sidecarfile2.parquet"],
+            get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?,
+        ),
+    );
+    // Assert that the second batch returned is from reading sidecarfile1
+    let (second_batch, is_log_batch) = iter.next().unwrap()?;
+    assert!(!is_log_batch);
+    assert_batch_matches(
+        second_batch,
+        add_batch_simple(v2_checkpoint_read_schema.clone()),
+    );
+
+    // Assert that the second batch returned is from reading sidecarfile2
+    let (third_batch, is_log_batch) = iter.next().unwrap()?;
+    assert!(!is_log_batch);
+    assert_batch_matches(
+        third_batch,
+        add_batch_with_remove(v2_checkpoint_read_schema),
+    );
+
+    assert!(iter.next().is_none());
+
+    Ok(())
+}
diff --git a/kernel/src/parquet.rs b/kernel/src/parquet.rs
new file mode 100644
index 000000000..362079290
--- /dev/null
+++ b/kernel/src/parquet.rs
@@ -0,0 +1,17 @@
+//! This module exists to help re-export the version of arrow used by default-engine and other
+//! parts of kernel that need arrow
+
+#[cfg(feature = "arrow_53")]
+pub use parquet_53::*;
+
+#[cfg(all(feature = "arrow_54", not(feature = "arrow_53")))]
+pub use parquet_54::*;
+
+// if nothing is enabled but we need arrow because of some other feature flag, default to lowest
+// supported version
+#[cfg(all(
+    feature = "need_arrow",
+    not(feature = "arrow_53"),
+    not(feature = "arrow_54")
+))]
+compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow_53` or `arrow_54` feature");
diff --git a/kernel/src/path.rs b/kernel/src/path.rs
index 23e7819de..e2533d777 100644
--- a/kernel/src/path.rs
+++ b/kernel/src/path.rs
@@ -2,6 +2,7 @@
 
 use std::str::FromStr;
 use url::Url;
+use uuid::Uuid;
 
 use crate::{DeltaResult, Error, FileMeta, Version};
 
@@ -14,7 +15,7 @@ const MULTIPART_PART_LEN: usize = 10;
 /// The number of characters in the uuid part of a uuid checkpoint
 const UUID_PART_LEN: usize = 36;
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))]
 enum LogPathFileType {
@@ -37,7 +38,7 @@ enum LogPathFileType {
     Unknown,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))]
 struct ParsedLogPath<Location: AsUrl = FileMeta> {
@@ -88,7 +89,7 @@ impl<Location: AsUrl> ParsedLogPath<Location> {
         let filename = url
             .path_segments()
             .ok_or_else(|| Error::invalid_log_path(url))?
-            .last()
+            .next_back()
             .unwrap() // "the iterator always contains at least one string (which may be empty)"
             .to_string();
         if filename.is_empty() {
@@ -163,10 +164,11 @@ impl<Location: AsUrl> ParsedLogPath<Location> {
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))]
     fn is_checkpoint(&self) -> bool {
-        // TODO: Include UuidCheckpoint once we actually support v2 checkpoints
         matches!(
             self.file_type,
-            LogPathFileType::SinglePartCheckpoint | LogPathFileType::MultiPartCheckpoint { .. }
+            LogPathFileType::SinglePartCheckpoint
+                | LogPathFileType::MultiPartCheckpoint { .. }
+                | LogPathFileType::UuidCheckpoint(_)
         )
     }
 
@@ -174,24 +176,25 @@ impl<Location: AsUrl> ParsedLogPath<Location> {
     #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))]
     #[allow(dead_code)] // currently only used in tests, which don't "count"
     fn is_unknown(&self) -> bool {
-        // TODO: Stop treating UuidCheckpoint as unknown once we support v2 checkpoints
-        matches!(
-            self.file_type,
-            LogPathFileType::Unknown | LogPathFileType::UuidCheckpoint(_)
-        )
+        matches!(self.file_type, LogPathFileType::Unknown)
     }
 }
 
 impl ParsedLogPath<Url> {
-    /// Create a new ParsedCommitPath<Url> for a new json commit file at the specified version
-    pub(crate) fn new_commit(
-        table_root: &Url,
-        version: Version,
-    ) -> DeltaResult<ParsedLogPath<Url>> {
+    const DELTA_LOG_DIR: &'static str = "_delta_log/";
+
+    /// Helper method to create a path with the given filename generator
+    fn create_path(table_root: &Url, filename: String) -> DeltaResult<Self> {
+        let location = table_root.join(Self::DELTA_LOG_DIR)?.join(&filename)?;
+        Self::try_from(location)?.ok_or_else(|| {
+            Error::internal_error(format!("Attempted to create an invalid path: {}", filename))
+        })
+    }
+
+    /// Create a new ParsedCommitPath<Url> for a new json commit file
+    pub(crate) fn new_commit(table_root: &Url, version: Version) -> DeltaResult<Self> {
         let filename = format!("{:020}.json", version);
-        let location = table_root.join("_delta_log/")?.join(&filename)?;
-        let path = Self::try_from(location)?
-            .ok_or_else(|| Error::internal_error("attempted to create invalid commit path"))?;
+        let path = Self::create_path(table_root, filename)?;
         if !path.is_commit() {
             return Err(Error::internal_error(
                 "ParsedLogPath::new_commit created a non-commit path",
@@ -199,6 +202,38 @@ impl ParsedLogPath<Url> {
         }
         Ok(path)
     }
+
+    /// Create a new ParsedCheckpointPath<Url> for a classic parquet checkpoint file
+    #[allow(dead_code)] // TODO: Remove this once we have a use case for it
+    pub(crate) fn new_classic_parquet_checkpoint(
+        table_root: &Url,
+        version: Version,
+    ) -> DeltaResult<Self> {
+        let filename = format!("{:020}.checkpoint.parquet", version);
+        let path = Self::create_path(table_root, filename)?;
+        if !path.is_checkpoint() {
+            return Err(Error::internal_error(
+                "ParsedLogPath::new_classic_parquet_checkpoint created a non-checkpoint path",
+            ));
+        }
+        Ok(path)
+    }
+
+    /// Create a new ParsedCheckpointPath<Url> for a UUID-based parquet checkpoint file
+    #[allow(dead_code)] // TODO: Remove this once we have a use case for it
+    pub(crate) fn new_uuid_parquet_checkpoint(
+        table_root: &Url,
+        version: Version,
+    ) -> DeltaResult<Self> {
+        let filename = format!("{:020}.checkpoint.{}.parquet", version, Uuid::new_v4());
+        let path = Self::create_path(table_root, filename)?;
+        if !path.is_checkpoint() {
+            return Err(Error::internal_error(
+                "ParsedLogPath::new_uuid_parquet_checkpoint created a non-checkpoint path",
+            ));
+        }
+        Ok(path)
+    }
 }
 
 #[cfg(test)]
@@ -357,10 +392,7 @@ mod tests {
             LogPathFileType::UuidCheckpoint(ref u) if u == "3a0d65cd-4056-49b8-937b-95f9e3ee90e5",
         ));
         assert!(!log_path.is_commit());
-
-        // TODO: Support v2 checkpoints! Until then we can't treat these as checkpoint files.
-        assert!(!log_path.is_checkpoint());
-        assert!(log_path.is_unknown());
+        assert!(log_path.is_checkpoint());
 
         let log_path = table_log_dir
             .join("00000000000000000002.checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.json")
@@ -377,10 +409,7 @@ mod tests {
             LogPathFileType::UuidCheckpoint(ref u) if u == "3a0d65cd-4056-49b8-937b-95f9e3ee90e5",
         ));
         assert!(!log_path.is_commit());
-
-        // TODO: Support v2 checkpoints! Until then we can't treat these as checkpoint files.
-        assert!(!log_path.is_checkpoint());
-        assert!(log_path.is_unknown());
+        assert!(log_path.is_checkpoint());
 
         let log_path = table_log_dir
             .join("00000000000000000002.checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.foo")
@@ -575,4 +604,42 @@ mod tests {
         assert!(matches!(log_path.file_type, LogPathFileType::Commit));
         assert_eq!(log_path.filename, "00000000000000000010.json");
     }
+
+    #[test]
+    fn test_new_uuid_parquet_checkpoint() {
+        let table_log_dir = table_log_dir_url();
+        let log_path = ParsedLogPath::new_uuid_parquet_checkpoint(&table_log_dir, 10).unwrap();
+
+        assert_eq!(log_path.version, 10);
+        assert!(log_path.is_checkpoint());
+        assert_eq!(log_path.extension, "parquet");
+        if let LogPathFileType::UuidCheckpoint(uuid) = &log_path.file_type {
+            assert_eq!(uuid.len(), UUID_PART_LEN);
+        } else {
+            panic!("Expected UuidCheckpoint file type");
+        }
+
+        let filename = log_path.filename.to_string();
+        let filename_parts: Vec<&str> = filename.split('.').collect();
+        assert_eq!(filename_parts.len(), 4);
+        assert_eq!(filename_parts[0], "00000000000000000010");
+        assert_eq!(filename_parts[1], "checkpoint");
+        assert_eq!(filename_parts[2].len(), UUID_PART_LEN);
+        assert_eq!(filename_parts[3], "parquet");
+    }
+
+    #[test]
+    fn test_new_classic_parquet_checkpoint() {
+        let table_log_dir = table_log_dir_url();
+        let log_path = ParsedLogPath::new_classic_parquet_checkpoint(&table_log_dir, 10).unwrap();
+
+        assert_eq!(log_path.version, 10);
+        assert!(log_path.is_checkpoint());
+        assert_eq!(log_path.extension, "parquet");
+        assert!(matches!(
+            log_path.file_type,
+            LogPathFileType::SinglePartCheckpoint
+        ));
+        assert_eq!(log_path.filename, "00000000000000000010.checkpoint.parquet");
+    }
 }
diff --git a/kernel/src/scan/data_skipping.rs b/kernel/src/scan/data_skipping.rs
index 11181863d..c6897453b 100644
--- a/kernel/src/scan/data_skipping.rs
+++ b/kernel/src/scan/data_skipping.rs
@@ -11,8 +11,8 @@ use crate::expressions::{
     column_expr, joined_column_expr, BinaryOperator, ColumnName, Expression as Expr, ExpressionRef,
     Scalar, VariadicOperator,
 };
-use crate::predicates::{
-    DataSkippingPredicateEvaluator, PredicateEvaluator, PredicateEvaluatorDefaults,
+use crate::kernel_predicates::{
+    DataSkippingPredicateEvaluator, KernelPredicateEvaluator, KernelPredicateEvaluatorDefaults,
 };
 use crate::schema::{DataType, PrimitiveType, SchemaRef, SchemaTransform, StructField, StructType};
 use crate::{Engine, EngineData, ExpressionEvaluator, JsonHandler, RowVisitor as _};
@@ -33,9 +33,9 @@ mod tests;
 ///
 /// The variadic operations are rewritten as follows:
 /// - `AND` is rewritten as a conjunction of the rewritten operands where we just skip operands that
-///         are not eligible for data skipping.
+///   are not eligible for data skipping.
 /// - `OR` is rewritten only if all operands are eligible for data skipping. Otherwise, the whole OR
-///        expression is dropped.
+///   expression is dropped.
 #[cfg(test)]
 fn as_data_skipping_predicate(expr: &Expr) -> Option<Expr> {
     DataSkippingPredicateCreator.eval(expr)
@@ -75,6 +75,28 @@ impl DataSkippingFilter {
         let (predicate, referenced_schema) = physical_predicate?;
         debug!("Creating a data skipping filter for {:#?}", predicate);
 
+        // Convert all fields into nullable, as stats may not be available for all columns
+        // (and usually aren't for partition columns).
+        struct NullableStatsTransform;
+        impl<'a> SchemaTransform<'a> for NullableStatsTransform {
+            fn transform_struct_field(
+                &mut self,
+                field: &'a StructField,
+            ) -> Option<Cow<'a, StructField>> {
+                use Cow::*;
+                let field = match self.transform(&field.data_type)? {
+                    Borrowed(_) if field.is_nullable() => Borrowed(field),
+                    data_type => Owned(StructField {
+                        name: field.name.clone(),
+                        data_type: data_type.into_owned(),
+                        nullable: true,
+                        metadata: field.metadata.clone(),
+                    }),
+                };
+                Some(field)
+            }
+        }
+
         // Convert a min/max stats schema into a nullcount schema (all leaf fields are LONG)
         struct NullCountStatsTransform;
         impl<'a> SchemaTransform<'a> for NullCountStatsTransform {
@@ -85,14 +107,19 @@ impl DataSkippingFilter {
                 Some(Cow::Owned(PrimitiveType::Long))
             }
         }
-        let nullcount_schema = NullCountStatsTransform
+
+        let stats_schema = NullableStatsTransform
             .transform_struct(&referenced_schema)?
             .into_owned();
+
+        let nullcount_schema = NullCountStatsTransform
+            .transform_struct(&stats_schema)?
+            .into_owned();
         let stats_schema = Arc::new(StructType::new([
             StructField::nullable("numRecords", DataType::LONG),
             StructField::nullable("nullCount", nullcount_schema),
-            StructField::nullable("minValues", referenced_schema.clone()),
-            StructField::nullable("maxValues", referenced_schema),
+            StructField::nullable("minValues", stats_schema.clone()),
+            StructField::nullable("maxValues", stats_schema),
         ]));
 
         // Skipping happens in several steps:
@@ -106,20 +133,20 @@ impl DataSkippingFilter {
         //
         // 3. The selection evaluator does DISTINCT(col(predicate), 'false') to produce true (= keep) when
         //    the predicate is true/null and false (= skip) when the predicate is false.
-        let select_stats_evaluator = engine.get_expression_handler().get_evaluator(
+        let select_stats_evaluator = engine.evaluation_handler().new_expression_evaluator(
             // safety: kernel is very broken if we don't have the schema for Add actions
             get_log_add_schema().clone(),
             STATS_EXPR.clone(),
             DataType::STRING,
         );
 
-        let skipping_evaluator = engine.get_expression_handler().get_evaluator(
+        let skipping_evaluator = engine.evaluation_handler().new_expression_evaluator(
             stats_schema.clone(),
             Expr::struct_from([as_sql_data_skipping_predicate(&predicate)?]),
             PREDICATE_SCHEMA.clone(),
         );
 
-        let filter_evaluator = engine.get_expression_handler().get_evaluator(
+        let filter_evaluator = engine.evaluation_handler().new_expression_evaluator(
             stats_schema.clone(),
             FILTER_EXPR.clone(),
             DataType::BOOLEAN,
@@ -130,7 +157,7 @@ impl DataSkippingFilter {
             select_stats_evaluator,
             skipping_evaluator,
             filter_evaluator,
-            json_handler: engine.get_json_handler(),
+            json_handler: engine.json_handler(),
         })
     }
 
@@ -213,11 +240,11 @@ impl DataSkippingPredicateEvaluator for DataSkippingPredicateCreator {
     }
 
     fn eval_scalar_is_null(&self, val: &Scalar, inverted: bool) -> Option<Expr> {
-        PredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted).map(Expr::literal)
+        KernelPredicateEvaluatorDefaults::eval_scalar_is_null(val, inverted).map(Expr::literal)
     }
 
     fn eval_scalar(&self, val: &Scalar, inverted: bool) -> Option<Expr> {
-        PredicateEvaluatorDefaults::eval_scalar(val, inverted).map(Expr::literal)
+        KernelPredicateEvaluatorDefaults::eval_scalar(val, inverted).map(Expr::literal)
     }
 
     fn eval_is_null(&self, col: &ColumnName, inverted: bool) -> Option<Expr> {
@@ -235,7 +262,7 @@ impl DataSkippingPredicateEvaluator for DataSkippingPredicateCreator {
         right: &Scalar,
         inverted: bool,
     ) -> Option<Expr> {
-        PredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
+        KernelPredicateEvaluatorDefaults::eval_binary_scalars(op, left, right, inverted)
             .map(Expr::literal)
     }
 
diff --git a/kernel/src/scan/data_skipping/tests.rs b/kernel/src/scan/data_skipping/tests.rs
index 2ca7a0c01..4cac4e64a 100644
--- a/kernel/src/scan/data_skipping/tests.rs
+++ b/kernel/src/scan/data_skipping/tests.rs
@@ -1,7 +1,7 @@
 use super::*;
 
 use crate::expressions::column_name;
-use crate::predicates::{DefaultPredicateEvaluator, UnimplementedColumnResolver};
+use crate::kernel_predicates::{DefaultKernelPredicateEvaluator, UnimplementedColumnResolver};
 use std::collections::HashMap;
 
 const TRUE: Option<bool> = Some(true);
@@ -32,7 +32,7 @@ fn test_eval_is_null() {
             (column_name!("numRecords"), Scalar::from(2i64)),
             (column_name!("nullCount.x"), Scalar::from(nullcount)),
         ]);
-        let filter = DefaultPredicateEvaluator::from(resolver);
+        let filter = DefaultKernelPredicateEvaluator::from(resolver);
         for (expr, expect) in expressions.iter().zip(expected) {
             let pred = as_data_skipping_predicate(expr).unwrap();
             expect_eq!(
@@ -75,7 +75,7 @@ fn test_eval_binary_comparisons() {
             (column_name!("minValues.x"), min.clone()),
             (column_name!("maxValues.x"), max.clone()),
         ]);
-        let filter = DefaultPredicateEvaluator::from(resolver);
+        let filter = DefaultKernelPredicateEvaluator::from(resolver);
         for (expr, expect) in expressions.iter().zip(expected.iter()) {
             let pred = as_data_skipping_predicate(expr).unwrap();
             expect_eq!(
@@ -149,7 +149,7 @@ fn test_eval_variadic() {
         (&[NULL, TRUE, FALSE], FALSE, TRUE),
         (&[NULL, FALSE, TRUE], FALSE, TRUE),
     ];
-    let filter = DefaultPredicateEvaluator::from(UnimplementedColumnResolver);
+    let filter = DefaultKernelPredicateEvaluator::from(UnimplementedColumnResolver);
     for (inputs, expect_and, expect_or) in test_cases {
         let inputs: Vec<_> = inputs
             .iter()
@@ -214,7 +214,7 @@ fn test_eval_distinct() {
             (column_name!("minValues.x"), min.clone()),
             (column_name!("maxValues.x"), max.clone()),
         ]);
-        let filter = DefaultPredicateEvaluator::from(resolver);
+        let filter = DefaultKernelPredicateEvaluator::from(resolver);
         for (expr, expect) in expressions.iter().zip(expected) {
             let pred = as_data_skipping_predicate(expr).unwrap();
             expect_eq!(
@@ -286,7 +286,7 @@ fn test_sql_where() {
                     (column_name!("maxValues.x"), max.clone()),
                 ])
             };
-            let filter = DefaultPredicateEvaluator::from(resolver);
+            let filter = DefaultKernelPredicateEvaluator::from(resolver);
             let pred = as_data_skipping_predicate(expr).unwrap();
             expect_eq!(
                 filter.eval_expr(&pred, false),
diff --git a/kernel/src/scan/log_replay.rs b/kernel/src/scan/log_replay.rs
index 177996a80..4ae6d28a5 100644
--- a/kernel/src/scan/log_replay.rs
+++ b/kernel/src/scan/log_replay.rs
@@ -3,39 +3,74 @@ use std::collections::{HashMap, HashSet};
 use std::sync::{Arc, LazyLock};
 
 use itertools::Itertools;
-use tracing::debug;
 
 use super::data_skipping::DataSkippingFilter;
-use super::{ScanData, Transform};
+use super::{ScanMetadata, Transform};
 use crate::actions::get_log_add_schema;
 use crate::engine_data::{GetData, RowVisitor, TypedGetData as _};
 use crate::expressions::{column_expr, column_name, ColumnName, Expression, ExpressionRef};
-use crate::scan::{DeletionVectorDescriptor, TransformExpr};
+use crate::kernel_predicates::{DefaultKernelPredicateEvaluator, KernelPredicateEvaluator as _};
+use crate::log_replay::{FileActionDeduplicator, FileActionKey, LogReplayProcessor};
+use crate::scan::{Scalar, TransformExpr};
 use crate::schema::{ColumnNamesAndTypes, DataType, MapType, SchemaRef, StructField, StructType};
 use crate::utils::require;
 use crate::{DeltaResult, Engine, EngineData, Error, ExpressionEvaluator};
 
-/// The subset of file action fields that uniquely identifies it in the log, used for deduplication
-/// of adds and removes during log replay.
-#[derive(Debug, Hash, Eq, PartialEq)]
-struct FileActionKey {
-    path: String,
-    dv_unique_id: Option<String>,
-}
-impl FileActionKey {
-    fn new(path: impl Into<String>, dv_unique_id: Option<String>) -> Self {
-        let path = path.into();
-        Self { path, dv_unique_id }
-    }
-}
-
-struct LogReplayScanner {
-    filter: Option<DataSkippingFilter>,
-
+/// [`ScanLogReplayProcessor`] performs log replay (processes actions) specifically for doing a table scan.
+///
+/// During a table scan, the processor reads batches of log actions (in reverse chronological order)
+/// and performs the following steps:
+///
+/// - Data Skipping: Applies a predicate-based filter (via [`DataSkippingFilter`]) to quickly skip
+///   files that are irrelevant for the query.
+/// - Partition Pruning: Uses an optional partition filter (extracted from a physical predicate)
+///   to exclude actions whose partition values do not meet the required criteria.
+/// - Action Deduplication: Leverages the [`FileActionDeduplicator`] to ensure that for each unique file
+///   (identified by its path and deletion vector unique ID), only the latest valid Add action is processed.
+/// - Transformation: Applies a built-in transformation (`add_transform`) to convert selected Add actions
+///   into [`ScanMetadata`], the intermediate format passed to the engine.
+/// - Row Transform Passthrough: Any user-provided row-level transformation expressions (e.g. those derived
+///   from projection or filters) are preserved and passed through to the engine, which applies them as part
+///   of its scan execution logic.
+///
+/// As an implementation of [`LogReplayProcessor`], [`ScanLogReplayProcessor`] provides the
+/// `process_actions_batch` method, which applies these steps to each batch of log actions and
+/// produces a [`ScanMetadata`] result. This result includes the transformed batch, a selection
+/// vector indicating which rows are valid, and any row-level transformation expressions that need
+/// to be applied to the selected rows.
+struct ScanLogReplayProcessor {
+    partition_filter: Option<ExpressionRef>,
+    data_skipping_filter: Option<DataSkippingFilter>,
+    add_transform: Arc<dyn ExpressionEvaluator>,
+    logical_schema: SchemaRef,
+    transform: Option<Arc<Transform>>,
     /// A set of (data file path, dv_unique_id) pairs that have been seen thus
     /// far in the log. This is used to filter out files with Remove actions as
     /// well as duplicate entries in the log.
-    seen: HashSet<FileActionKey>,
+    seen_file_keys: HashSet<FileActionKey>,
+}
+
+impl ScanLogReplayProcessor {
+    /// Create a new [`ScanLogReplayProcessor`] instance
+    fn new(
+        engine: &dyn Engine,
+        physical_predicate: Option<(ExpressionRef, SchemaRef)>,
+        logical_schema: SchemaRef,
+        transform: Option<Arc<Transform>>,
+    ) -> Self {
+        Self {
+            partition_filter: physical_predicate.as_ref().map(|(e, _)| e.clone()),
+            data_skipping_filter: DataSkippingFilter::new(engine, physical_predicate),
+            add_transform: engine.evaluation_handler().new_expression_evaluator(
+                get_log_add_schema().clone(),
+                get_add_transform_expr(),
+                SCAN_ROW_DATATYPE.clone(),
+            ),
+            seen_file_keys: Default::default(),
+            logical_schema,
+            transform,
+        }
+    }
 }
 
 /// A visitor that deduplicates a stream of add and remove actions into a stream of valid adds. Log
@@ -43,68 +78,96 @@ struct LogReplayScanner {
 /// pair, we should ignore all subsequent (older) actions for that same (path, dvId) pair. If the
 /// first action for a given file is a remove, then that file does not show up in the result at all.
 struct AddRemoveDedupVisitor<'seen> {
-    seen: &'seen mut HashSet<FileActionKey>,
+    deduplicator: FileActionDeduplicator<'seen>,
     selection_vector: Vec<bool>,
     logical_schema: SchemaRef,
     transform: Option<Arc<Transform>>,
+    partition_filter: Option<ExpressionRef>,
     row_transform_exprs: Vec<Option<ExpressionRef>>,
-    is_log_batch: bool,
 }
 
 impl AddRemoveDedupVisitor<'_> {
-    /// Checks if log replay already processed this logical file (in which case the current action
-    /// should be ignored). If not already seen, register it so we can recognize future duplicates.
-    /// Returns `true` if we have seen the file and should ignore it, `false` if we have not seen it
-    /// and should process it.
-    fn check_and_record_seen(&mut self, key: FileActionKey) -> bool {
-        // Note: each (add.path + add.dv_unique_id()) pair has a
-        // unique Add + Remove pair in the log. For example:
-        // https://github.com/delta-io/delta/blob/master/spark/src/test/resources/delta/table-with-dv-large/_delta_log/00000000000000000001.json
-
-        if self.seen.contains(&key) {
-            debug!(
-                "Ignoring duplicate ({}, {:?}) in scan, is log {}",
-                key.path, key.dv_unique_id, self.is_log_batch
-            );
-            true
-        } else {
-            debug!(
-                "Including ({}, {:?}) in scan, is log {}",
-                key.path, key.dv_unique_id, self.is_log_batch
-            );
-            if self.is_log_batch {
-                // Remember file actions from this batch so we can ignore duplicates as we process
-                // batches from older commit and/or checkpoint files. We don't track checkpoint
-                // batches because they are already the oldest actions and never replace anything.
-                self.seen.insert(key);
-            }
-            false
+    // These index positions correspond to the order of columns defined in
+    // `selected_column_names_and_types()`
+    const ADD_PATH_INDEX: usize = 0; // Position of "add.path" in getters
+    const ADD_PARTITION_VALUES_INDEX: usize = 1; // Position of "add.partitionValues" in getters
+    const ADD_DV_START_INDEX: usize = 2; // Start position of add deletion vector columns
+    const REMOVE_PATH_INDEX: usize = 5; // Position of "remove.path" in getters
+    const REMOVE_DV_START_INDEX: usize = 6; // Start position of remove deletion vector columns
+
+    fn new(
+        seen: &mut HashSet<FileActionKey>,
+        selection_vector: Vec<bool>,
+        logical_schema: SchemaRef,
+        transform: Option<Arc<Transform>>,
+        partition_filter: Option<ExpressionRef>,
+        is_log_batch: bool,
+    ) -> AddRemoveDedupVisitor<'_> {
+        AddRemoveDedupVisitor {
+            deduplicator: FileActionDeduplicator::new(
+                seen,
+                is_log_batch,
+                Self::ADD_PATH_INDEX,
+                Self::REMOVE_PATH_INDEX,
+                Self::ADD_DV_START_INDEX,
+                Self::REMOVE_DV_START_INDEX,
+            ),
+            selection_vector,
+            logical_schema,
+            transform,
+            partition_filter,
+            row_transform_exprs: Vec::new(),
         }
     }
 
+    fn parse_partition_value(
+        &self,
+        field_idx: usize,
+        partition_values: &HashMap<String, String>,
+    ) -> DeltaResult<(usize, (String, Scalar))> {
+        let field = self.logical_schema.fields.get_index(field_idx);
+        let Some((_, field)) = field else {
+            return Err(Error::InternalError(format!(
+                "out of bounds partition column field index {field_idx}"
+            )));
+        };
+        let name = field.physical_name();
+        let partition_value =
+            super::parse_partition_value(partition_values.get(name), field.data_type())?;
+        Ok((field_idx, (name.to_string(), partition_value)))
+    }
+
+    fn parse_partition_values(
+        &self,
+        transform: &Transform,
+        partition_values: &HashMap<String, String>,
+    ) -> DeltaResult<HashMap<usize, (String, Scalar)>> {
+        transform
+            .iter()
+            .filter_map(|transform_expr| match transform_expr {
+                TransformExpr::Partition(field_idx) => {
+                    Some(self.parse_partition_value(*field_idx, partition_values))
+                }
+                TransformExpr::Static(_) => None,
+            })
+            .try_collect()
+    }
+
     /// Compute an expression that will transform from physical to logical for a given Add file action
-    fn get_transform_expr<'a>(
+    fn get_transform_expr(
         &self,
-        i: usize,
         transform: &Transform,
-        getters: &[&'a dyn GetData<'a>],
+        mut partition_values: HashMap<usize, (String, Scalar)>,
     ) -> DeltaResult<ExpressionRef> {
-        let partition_values: HashMap<_, _> = getters[1].get(i, "add.partitionValues")?;
         let transforms = transform
             .iter()
             .map(|transform_expr| match transform_expr {
                 TransformExpr::Partition(field_idx) => {
-                    let field = self.logical_schema.fields.get_index(*field_idx);
-                    let Some((_, field)) = field else {
-                        return Err(Error::Generic(
-                            format!("logical schema did not contain expected field at {field_idx}, can't transform data")
-                        ));
+                    let Some((_, partition_value)) = partition_values.remove(field_idx) else {
+                        return Err(Error::InternalError(format!(
+                            "missing partition value for field index {field_idx}"
+                        )));
                     };
-                    let name = field.physical_name();
-                    let partition_value = super::parse_partition_value(
-                        partition_values.get(name),
-                        field.data_type(),
-                    )?;
                     Ok(partition_value.into())
                 }
                 TransformExpr::Static(field_expr) => Ok(field_expr.clone()),
@@ -113,40 +176,69 @@ impl AddRemoveDedupVisitor<'_> {
         Ok(Arc::new(Expression::Struct(transforms)))
     }
 
+    fn is_file_partition_pruned(
+        &self,
+        partition_values: &HashMap<usize, (String, Scalar)>,
+    ) -> bool {
+        if partition_values.is_empty() {
+            return false;
+        }
+        let Some(partition_filter) = &self.partition_filter else {
+            return false;
+        };
+        let partition_values: HashMap<_, _> = partition_values
+            .values()
+            .map(|(k, v)| (ColumnName::new([k]), v.clone()))
+            .collect();
+        let evaluator = DefaultKernelPredicateEvaluator::from(partition_values);
+        evaluator.eval_sql_where(partition_filter) == Some(false)
+    }
+
     /// True if this row contains an Add action that should survive log replay. Skip it if the row
     /// is not an Add action, or the file has already been seen previously.
     fn is_valid_add<'a>(&mut self, i: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<bool> {
-        // Add will have a path at index 0 if it is valid; otherwise, if it is a log batch, we may
-        // have a remove with a path at index 4. In either case, extract the three dv getters at
-        // indexes that immediately follow a valid path index.
-        let (path, dv_getters, is_add) = if let Some(path) = getters[0].get_str(i, "add.path")? {
-            (path, &getters[2..5], true)
-        } else if !self.is_log_batch {
-            return Ok(false);
-        } else if let Some(path) = getters[5].get_opt(i, "remove.path")? {
-            (path, &getters[6..9], false)
-        } else {
+        // When processing file actions, we extract path and deletion vector information based on action type:
+        // - For Add actions: path is at index 0, followed by DV fields at indexes 2-4
+        // - For Remove actions (in log batches only): path is at index 5, followed by DV fields at indexes 6-8
+        // The file extraction logic selects the appropriate indexes based on whether we found a valid path.
+        // Remove getters are not included when visiting a non-log batch (checkpoint batch), so do
+        // not try to extract remove actions in that case.
+        let Some((file_key, is_add)) = self.deduplicator.extract_file_action(
+            i,
+            getters,
+            !self.deduplicator.is_log_batch(), // skip_removes. true if this is a checkpoint batch
+        )?
+        else {
             return Ok(false);
         };
 
-        let dv_unique_id = match dv_getters[0].get_opt(i, "deletionVector.storageType")? {
-            Some(storage_type) => Some(DeletionVectorDescriptor::unique_id_from_parts(
-                storage_type,
-                dv_getters[1].get(i, "deletionVector.pathOrInlineDv")?,
-                dv_getters[2].get_opt(i, "deletionVector.offset")?,
-            )),
-            None => None,
+        // Apply partition pruning (to adds only) before deduplication, so that we don't waste memory
+        // tracking pruned files. Removes don't get pruned and we'll still have to track them.
+        //
+        // WARNING: It's not safe to partition-prune removes (just like it's not safe to data skip
+        // removes), because they are needed to suppress earlier incompatible adds we might
+        // encounter if the table's schema was replaced after the most recent checkpoint.
+        let partition_values = match &self.transform {
+            Some(transform) if is_add => {
+                let partition_values =
+                    getters[Self::ADD_PARTITION_VALUES_INDEX].get(i, "add.partitionValues")?;
+                let partition_values = self.parse_partition_values(transform, &partition_values)?;
+                if self.is_file_partition_pruned(&partition_values) {
+                    return Ok(false);
+                }
+                partition_values
+            }
+            _ => Default::default(),
         };
 
         // Check both adds and removes (skipping already-seen), but only transform and return adds
-        let file_key = FileActionKey::new(path, dv_unique_id);
-        if self.check_and_record_seen(file_key) || !is_add {
+        if self.deduplicator.check_and_record_seen(file_key) || !is_add {
             return Ok(false);
         }
         let transform = self
             .transform
             .as_ref()
-            .map(|transform| self.get_transform_expr(i, transform, getters))
+            .map(|transform| self.get_transform_expr(transform, partition_values))
             .transpose()?;
         if transform.is_some() {
             // fill in any needed `None`s for previous rows
@@ -179,7 +271,7 @@ impl RowVisitor for AddRemoveDedupVisitor<'_> {
             (names, types).into()
         });
         let (names, types) = NAMES_AND_TYPES.as_ref();
-        if self.is_log_batch {
+        if self.deduplicator.is_log_batch() {
             (names, types)
         } else {
             // All checkpoint actions are already reconciled and Remove actions in checkpoint files
@@ -189,7 +281,8 @@ impl RowVisitor for AddRemoveDedupVisitor<'_> {
     }
 
     fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<()> {
-        let expected_getters = if self.is_log_batch { 9 } else { 5 };
+        let is_log_batch = self.deduplicator.is_log_batch();
+        let expected_getters = if is_log_batch { 9 } else { 5 };
         require!(
             getters.len() == expected_getters,
             Error::InternalError(format!(
@@ -246,45 +339,41 @@ fn get_add_transform_expr() -> Expression {
     ])
 }
 
-impl LogReplayScanner {
-    /// Create a new [`LogReplayScanner`] instance
-    fn new(engine: &dyn Engine, physical_predicate: Option<(ExpressionRef, SchemaRef)>) -> Self {
-        Self {
-            filter: DataSkippingFilter::new(engine, physical_predicate),
-            seen: Default::default(),
-        }
-    }
+impl LogReplayProcessor for ScanLogReplayProcessor {
+    type Output = ScanMetadata;
 
-    fn process_scan_batch(
+    fn process_actions_batch(
         &mut self,
-        add_transform: &dyn ExpressionEvaluator,
-        actions: &dyn EngineData,
-        logical_schema: SchemaRef,
-        transform: Option<Arc<Transform>>,
+        actions_batch: &dyn EngineData,
         is_log_batch: bool,
-    ) -> DeltaResult<ScanData> {
-        // Apply data skipping to get back a selection vector for actions that passed skipping. We
-        // will update the vector below as log replay identifies duplicates that should be ignored.
-        let selection_vector = match &self.filter {
-            Some(filter) => filter.apply(actions)?,
-            None => vec![true; actions.len()],
-        };
-        assert_eq!(selection_vector.len(), actions.len());
-
-        let mut visitor = AddRemoveDedupVisitor {
-            seen: &mut self.seen,
+    ) -> DeltaResult<Self::Output> {
+        // Build an initial selection vector for the batch which has had the data skipping filter
+        // applied. The selection vector is further updated by the deduplication visitor to remove
+        // rows that are not valid adds.
+        let selection_vector = self.build_selection_vector(actions_batch)?;
+        assert_eq!(selection_vector.len(), actions_batch.len());
+
+        let mut visitor = AddRemoveDedupVisitor::new(
+            &mut self.seen_file_keys,
             selection_vector,
-            logical_schema,
-            transform,
-            row_transform_exprs: Vec::new(),
+            self.logical_schema.clone(),
+            self.transform.clone(),
+            self.partition_filter.clone(),
             is_log_batch,
-        };
-        visitor.visit_rows_of(actions)?;
+        );
+        visitor.visit_rows_of(actions_batch)?;
 
         // TODO: Teach expression eval to respect the selection vector we just computed so carefully!
-        let selection_vector = visitor.selection_vector;
-        let result = add_transform.evaluate(actions)?;
-        Ok((result, selection_vector, visitor.row_transform_exprs))
+        let result = self.add_transform.evaluate(actions_batch)?;
+        Ok(ScanMetadata::new(
+            result,
+            visitor.selection_vector,
+            visitor.row_transform_exprs,
+        ))
+    }
+
+    fn data_skipping_filter(&self) -> Option<&DataSkippingFilter> {
+        self.data_skipping_filter.as_ref()
     }
 }
 
@@ -298,31 +387,16 @@ pub(crate) fn scan_action_iter(
     logical_schema: SchemaRef,
     transform: Option<Arc<Transform>>,
     physical_predicate: Option<(ExpressionRef, SchemaRef)>,
-) -> impl Iterator<Item = DeltaResult<ScanData>> {
-    let mut log_scanner = LogReplayScanner::new(engine, physical_predicate);
-    let add_transform = engine.get_expression_handler().get_evaluator(
-        get_log_add_schema().clone(),
-        get_add_transform_expr(),
-        SCAN_ROW_DATATYPE.clone(),
-    );
-    action_iter
-        .map(move |action_res| {
-            let (batch, is_log_batch) = action_res?;
-            log_scanner.process_scan_batch(
-                add_transform.as_ref(),
-                batch.as_ref(),
-                logical_schema.clone(),
-                transform.clone(),
-                is_log_batch,
-            )
-        })
-        .filter(|res| res.as_ref().map_or(true, |(_, sv, _)| sv.contains(&true)))
+) -> impl Iterator<Item = DeltaResult<ScanMetadata>> {
+    ScanLogReplayProcessor::new(engine, physical_predicate, logical_schema, transform)
+        .process_actions_iter(action_iter)
 }
 
 #[cfg(test)]
 mod tests {
     use std::{collections::HashMap, sync::Arc};
 
+    use crate::actions::get_log_schema;
     use crate::expressions::{column_name, Scalar};
     use crate::scan::state::{DvInfo, Stats};
     use crate::scan::test_utils::{
@@ -364,7 +438,7 @@ mod tests {
     #[test]
     fn test_scan_action_iter() {
         run_with_validate_callback(
-            vec![add_batch_simple()],
+            vec![add_batch_simple(get_log_schema().clone())],
             None, // not testing schema
             None, // not testing transform
             &[true, false],
@@ -376,7 +450,7 @@ mod tests {
     #[test]
     fn test_scan_action_iter_with_remove() {
         run_with_validate_callback(
-            vec![add_batch_with_remove()],
+            vec![add_batch_with_remove(get_log_schema().clone())],
             None, // not testing schema
             None, // not testing transform
             &[false, false, true, false],
@@ -387,7 +461,7 @@ mod tests {
 
     #[test]
     fn test_no_transforms() {
-        let batch = vec![add_batch_simple()];
+        let batch = vec![add_batch_simple(get_log_schema().clone())];
         let logical_schema = Arc::new(crate::schema::StructType::new(vec![]));
         let iter = scan_action_iter(
             &SyncEngine::new(),
@@ -397,8 +471,11 @@ mod tests {
             None,
         );
         for res in iter {
-            let (_batch, _sel, transforms) = res.unwrap();
-            assert!(transforms.is_empty(), "Should have no transforms");
+            let scan_metadata = res.unwrap();
+            assert!(
+                scan_metadata.scan_file_transforms.is_empty(),
+                "Should have no transforms"
+            );
         }
     }
 
@@ -443,7 +520,8 @@ mod tests {
         }
 
         for res in iter {
-            let (_batch, _sel, transforms) = res.unwrap();
+            let scan_metadata = res.unwrap();
+            let transforms = scan_metadata.scan_file_transforms;
             // in this case we have a metadata action first and protocol 3rd, so we expect 4 items,
             // the first and 3rd being a `None`
             assert_eq!(transforms.len(), 4, "Should have 4 transforms");
diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs
index 14e2ee50f..1a9412089 100644
--- a/kernel/src/scan/mod.rs
+++ b/kernel/src/scan/mod.rs
@@ -11,9 +11,11 @@ use url::Url;
 use crate::actions::deletion_vector::{
     deletion_treemap_to_bools, split_vector, DeletionVectorDescriptor,
 };
-use crate::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME};
+use crate::actions::{get_log_schema, ADD_NAME, REMOVE_NAME, SIDECAR_NAME};
+use crate::engine_data::FilteredEngineData;
 use crate::expressions::{ColumnName, Expression, ExpressionRef, ExpressionTransform, Scalar};
-use crate::predicates::{DefaultPredicateEvaluator, EmptyColumnResolver};
+use crate::kernel_predicates::{DefaultKernelPredicateEvaluator, EmptyColumnResolver};
+use crate::log_replay::HasSelectionVector;
 use crate::scan::state::{DvInfo, Stats};
 use crate::schema::{
     ArrayType, DataType, MapType, PrimitiveType, Schema, SchemaRef, SchemaTransform, StructField,
@@ -96,9 +98,7 @@ impl ScanBuilder {
     /// perform actual data reads.
     pub fn build(self) -> DeltaResult<Scan> {
         // if no schema is provided, use snapshot's entire schema (e.g. SELECT *)
-        let logical_schema = self
-            .schema
-            .unwrap_or_else(|| self.snapshot.schema().clone().into());
+        let logical_schema = self.schema.unwrap_or_else(|| self.snapshot.schema());
         let state_info = get_state_info(
             logical_schema.as_ref(),
             &self.snapshot.metadata().partition_columns,
@@ -184,8 +184,9 @@ impl PhysicalPredicate {
 // the predicate allows to statically skip all files. Since this is direct evaluation (not an
 // expression rewrite), we use a `DefaultPredicateEvaluator` with an empty column resolver.
 fn can_statically_skip_all_files(predicate: &Expression) -> bool {
-    use crate::predicates::PredicateEvaluator as _;
-    DefaultPredicateEvaluator::from(EmptyColumnResolver).eval_sql_where(predicate) == Some(false)
+    use crate::kernel_predicates::KernelPredicateEvaluator as _;
+    DefaultKernelPredicateEvaluator::from(EmptyColumnResolver).eval_sql_where(predicate)
+        == Some(false)
 }
 
 // Build the stats read schema filtering the table schema to keep only skipping-eligible
@@ -322,9 +323,48 @@ pub(crate) enum TransformExpr {
     Partition(usize),
 }
 
-// TODO(nick): Make this a struct in a follow-on PR
-// (data, deletion_vec, transforms)
-pub type ScanData = (Box<dyn EngineData>, Vec<bool>, Vec<Option<ExpressionRef>>);
+/// [`ScanMetadata`] contains (1) a batch of [`FilteredEngineData`] specifying data files to be scanned
+/// and (2) a vector of transforms (one transform per scan file) that must be applied to the data read
+/// from those files.
+pub struct ScanMetadata {
+    /// Filtered engine data with one row per file to scan (and only selected rows should be scanned)
+    pub scan_files: FilteredEngineData,
+
+    /// Row-level transformations to apply to data read from files.
+    ///
+    /// Each entry in this vector corresponds to a row in the `scan_files` data. The entry is an
+    /// optional expression that must be applied to convert the file's data into the logical schema
+    /// expected by the scan:
+    ///
+    /// - `Some(expr)`: Apply this expression to transform the data to match [`Scan::schema()`].
+    /// - `None`: No transformation is needed; the data is already in the correct logical form.
+    ///
+    /// Note: This vector can be indexed by row number, as rows masked by the selection vector will
+    /// have corresponding entries that will be `None`.
+    pub scan_file_transforms: Vec<Option<ExpressionRef>>,
+}
+
+impl ScanMetadata {
+    fn new(
+        data: Box<dyn EngineData>,
+        selection_vector: Vec<bool>,
+        scan_file_transforms: Vec<Option<ExpressionRef>>,
+    ) -> Self {
+        Self {
+            scan_files: FilteredEngineData {
+                data,
+                selection_vector,
+            },
+            scan_file_transforms,
+        }
+    }
+}
+
+impl HasSelectionVector for ScanMetadata {
+    fn has_selected_rows(&self) -> bool {
+        self.scan_files.selection_vector.contains(&true)
+    }
+}
 
 /// The result of building a scan over a table. This can be used to get the actual data from
 /// scanning the table.
@@ -378,9 +418,9 @@ impl Scan {
             .collect()
     }
 
-    /// Get an iterator of [`EngineData`]s that should be included in scan for a query. This handles
-    /// log-replay, reconciling Add and Remove actions, and applying data skipping (if
-    /// possible). Each item in the returned iterator is a tuple of:
+    /// Get an iterator of [`ScanMetadata`]s that should be used to facilitate a scan. This handles
+    /// log-replay, reconciling Add and Remove actions, and applying data skipping (if possible).
+    /// Each item in the returned iterator is a struct of:
     /// - `Box<dyn EngineData>`: Data in engine format, where each row represents a file to be
     ///   scanned. The schema for each row can be obtained by calling [`scan_row_schema`].
     /// - `Vec<bool>`: A selection vector. If a row is at index `i` and this vector is `false` at
@@ -391,22 +431,22 @@ impl Scan {
     ///   `filter_record_batch`, you _need_ to extend this vector to the full length of the batch or
     ///   arrow will drop the extra rows.
     /// - `Vec<Option<Expression>>`: Transformation expressions that need to be applied. For each
-    ///    row at index `i` in the above data, if an expression exists at index `i` in the `Vec`,
-    ///    the associated expression _must_ be applied to the data read from the file specified by
-    ///    the row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If
-    ///    the item at index `i` in this `Vec` is `None`, or if the `Vec` contains fewer than `i`
-    ///    elements, no expression need be applied and the data read from disk is already in the
-    ///    correct logical state.
-    pub fn scan_data(
+    ///   row at index `i` in the above data, if an expression exists at index `i` in the `Vec`,
+    ///   the associated expression _must_ be applied to the data read from the file specified by
+    ///   the row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If
+    ///   the item at index `i` in this `Vec` is `None`, or if the `Vec` contains fewer than `i`
+    ///   elements, no expression need be applied and the data read from disk is already in the
+    ///   correct logical state.
+    pub fn scan_metadata(
         &self,
         engine: &dyn Engine,
-    ) -> DeltaResult<impl Iterator<Item = DeltaResult<ScanData>>> {
+    ) -> DeltaResult<impl Iterator<Item = DeltaResult<ScanMetadata>>> {
         // Compute the static part of the transformation. This is `None` if no transformation is
         // needed (currently just means no partition cols AND no column mapping but will be extended
         // for other transforms as we support them)
         let static_transform = (self.have_partition_cols
             || self.snapshot.column_mapping_mode() != ColumnMappingMode::None)
-            .then_some(Arc::new(Scan::get_static_transform(&self.all_fields)));
+            .then(|| Arc::new(Scan::get_static_transform(&self.all_fields)));
         let physical_predicate = match self.physical_predicate.clone() {
             PhysicalPredicate::StaticSkipAll => return Ok(None.into_iter().flatten()),
             PhysicalPredicate::Some(predicate, schema) => Some((predicate, schema)),
@@ -414,7 +454,7 @@ impl Scan {
         };
         let it = scan_action_iter(
             engine,
-            self.replay_for_scan_data(engine)?,
+            self.replay_for_scan_metadata(engine)?,
             self.logical_schema.clone(),
             static_transform,
             physical_predicate,
@@ -423,18 +463,21 @@ impl Scan {
     }
 
     // Factored out to facilitate testing
-    fn replay_for_scan_data(
+    fn replay_for_scan_metadata(
         &self,
         engine: &dyn Engine,
     ) -> DeltaResult<impl Iterator<Item = DeltaResult<(Box<dyn EngineData>, bool)>> + Send> {
         let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?;
-        let checkpoint_read_schema = get_log_add_schema().clone();
+        let checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?;
 
         // NOTE: We don't pass any meta-predicate because we expect no meaningful row group skipping
         // when ~every checkpoint file will contain the adds and removes we are looking for.
-        self.snapshot
-            .log_segment()
-            .replay(engine, commit_read_schema, checkpoint_read_schema, None)
+        self.snapshot.log_segment().read_actions(
+            engine,
+            commit_read_schema,
+            checkpoint_read_schema,
+            None,
+        )
     }
 
     /// Get global state that is valid for the entire scan. This is somewhat expensive so should
@@ -448,14 +491,14 @@ impl Scan {
         }
     }
 
-    /// Perform an "all in one" scan. This will use the provided `engine` to read and
-    /// process all the data for the query. Each [`ScanResult`] in the resultant iterator encapsulates
-    /// the raw data and an optional boolean vector built from the deletion vector if it was
-    /// present. See the documentation for [`ScanResult`] for more details. Generally
-    /// connectors/engines will want to use [`Scan::scan_data`] so they can have more control over
-    /// the execution of the scan.
-    // This calls [`Scan::scan_data`] to get an iterator of `ScanData` actions for the scan, and then uses the
-    // `engine`'s [`crate::ParquetHandler`] to read the actual table data.
+    /// Perform an "all in one" scan. This will use the provided `engine` to read and process all
+    /// the data for the query. Each [`ScanResult`] in the resultant iterator encapsulates the raw
+    /// data and an optional boolean vector built from the deletion vector if it was present. See
+    /// the documentation for [`ScanResult`] for more details. Generally connectors/engines will
+    /// want to use [`Scan::scan_metadata`] so they can have more control over the execution of the
+    /// scan.
+    // This calls [`Scan::scan_metadata`] to get an iterator of `ScanMetadata` actions for the scan,
+    // and then uses the `engine`'s [`crate::ParquetHandler`] to read the actual table data.
     pub fn execute(
         &self,
         engine: Arc<dyn Engine>,
@@ -466,7 +509,7 @@ impl Scan {
             dv_info: DvInfo,
             transform: Option<ExpressionRef>,
         }
-        fn scan_data_callback(
+        fn scan_metadata_callback(
             batches: &mut Vec<ScanFile>,
             path: &str,
             size: i64,
@@ -492,18 +535,12 @@ impl Scan {
         let table_root = self.snapshot.table_root().clone();
         let physical_predicate = self.physical_predicate();
 
-        let scan_data = self.scan_data(engine.as_ref())?;
-        let scan_files_iter = scan_data
+        let scan_metadata_iter = self.scan_metadata(engine.as_ref())?;
+        let scan_files_iter = scan_metadata_iter
             .map(|res| {
-                let (data, vec, transforms) = res?;
+                let scan_metadata = res?;
                 let scan_files = vec![];
-                state::visit_scan_files(
-                    data.as_ref(),
-                    &vec,
-                    &transforms,
-                    scan_files,
-                    scan_data_callback,
-                )
+                scan_metadata.visit_scan_files(scan_files, scan_metadata_callback)
             })
             // Iterator<DeltaResult<Vec<ScanFile>>> to Iterator<DeltaResult<ScanFile>>
             .flatten_ok();
@@ -525,7 +562,7 @@ impl Scan {
                 // partition columns, but the read schema we use here does _NOT_ include partition
                 // columns. So we cannot safely assume that all column references are valid. See
                 // https://github.com/delta-io/delta-kernel-rs/issues/434 for more details.
-                let read_result_iter = engine.get_parquet_handler().read_parquet_files(
+                let read_result_iter = engine.parquet_handler().read_parquet_files(
                     &[meta],
                     global_state.physical_schema.clone(),
                     physical_predicate.clone(),
@@ -567,7 +604,7 @@ impl Scan {
     }
 }
 
-/// Get the schema that scan rows (from [`Scan::scan_data`]) will be returned with.
+/// Get the schema that scan rows (from [`Scan::scan_metadata`]) will be returned with.
 ///
 /// It is:
 /// ```ignored
@@ -655,19 +692,19 @@ pub fn selection_vector(
     descriptor: &DeletionVectorDescriptor,
     table_root: &Url,
 ) -> DeltaResult<Vec<bool>> {
-    let fs_client = engine.get_file_system_client();
-    let dv_treemap = descriptor.read(fs_client, table_root)?;
+    let storage = engine.storage_handler();
+    let dv_treemap = descriptor.read(storage, table_root)?;
     Ok(deletion_treemap_to_bools(dv_treemap))
 }
 
 // some utils that are used in file_stream.rs and state.rs tests
 #[cfg(test)]
 pub(crate) mod test_utils {
+    use crate::arrow::array::StringArray;
+    use crate::utils::test_utils::string_array_to_engine_data;
+    use itertools::Itertools;
     use std::sync::Arc;
 
-    use arrow_array::{RecordBatch, StringArray};
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
-
     use crate::{
         actions::get_log_schema,
         engine::{
@@ -676,37 +713,59 @@ pub(crate) mod test_utils {
         },
         scan::log_replay::scan_action_iter,
         schema::SchemaRef,
-        EngineData, JsonHandler,
+        JsonHandler,
     };
 
     use super::{state::ScanCallback, Transform};
 
-    // TODO(nick): Merge all copies of this into one "test utils" thing
-    fn string_array_to_engine_data(string_array: StringArray) -> Box<dyn EngineData> {
-        let string_field = Arc::new(Field::new("a", DataType::Utf8, true));
-        let schema = Arc::new(ArrowSchema::new(vec![string_field]));
-        let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)])
-            .expect("Can't convert to record batch");
-        Box::new(ArrowEngineData::new(batch))
+    // Generates a batch of sidecar actions with the given paths.
+    // The schema is provided as null columns affect equality checks.
+    pub(crate) fn sidecar_batch_with_given_paths(
+        paths: Vec<&str>,
+        output_schema: SchemaRef,
+    ) -> Box<ArrowEngineData> {
+        let handler = SyncJsonHandler {};
+
+        let mut json_strings: Vec<String> = paths
+        .iter()
+        .map(|path| {
+            format!(
+                r#"{{"sidecar":{{"path":"{path}","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{{"tag_foo":"tag_bar"}}}}}}"#
+            )
+        })
+        .collect();
+        json_strings.push(r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#.to_string());
+
+        let json_strings_array: StringArray =
+            json_strings.iter().map(|s| s.as_str()).collect_vec().into();
+
+        let parsed = handler
+            .parse_json(
+                string_array_to_engine_data(json_strings_array),
+                output_schema,
+            )
+            .unwrap();
+
+        ArrowEngineData::try_from_engine_data(parsed).unwrap()
     }
 
-    // simple add
-    pub(crate) fn add_batch_simple() -> Box<ArrowEngineData> {
+    // Generates a batch with an add action.
+    // The schema is provided as null columns affect equality checks.
+    pub(crate) fn add_batch_simple(output_schema: SchemaRef) -> Box<ArrowEngineData> {
         let handler = SyncJsonHandler {};
         let json_strings: StringArray = vec![
             r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues": {"date": "2017-12-10"},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#,
             r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#,
         ]
         .into();
-        let output_schema = get_log_schema().clone();
         let parsed = handler
             .parse_json(string_array_to_engine_data(json_strings), output_schema)
             .unwrap();
         ArrowEngineData::try_from_engine_data(parsed).unwrap()
     }
 
-    // add batch with a removed file
-    pub(crate) fn add_batch_with_remove() -> Box<ArrowEngineData> {
+    // An add batch with a removed file parsed with the schema provided
+    pub(crate) fn add_batch_with_remove(output_schema: SchemaRef) -> Box<ArrowEngineData> {
         let handler = SyncJsonHandler {};
         let json_strings: StringArray = vec![
             r#"{"remove":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c001.snappy.parquet","deletionTimestamp":1677811194426,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":635,"tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#,
@@ -715,7 +774,6 @@ pub(crate) mod test_utils {
             r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#,
         ]
         .into();
-        let output_schema = get_log_schema().clone();
         let parsed = handler
             .parse_json(string_array_to_engine_data(json_strings), output_schema)
             .unwrap();
@@ -761,16 +819,11 @@ pub(crate) mod test_utils {
         );
         let mut batch_count = 0;
         for res in iter {
-            let (batch, sel, transforms) = res.unwrap();
-            assert_eq!(sel, expected_sel_vec);
-            crate::scan::state::visit_scan_files(
-                batch.as_ref(),
-                &sel,
-                &transforms,
-                context.clone(),
-                validate_callback,
-            )
-            .unwrap();
+            let scan_metadata = res.unwrap();
+            assert_eq!(scan_metadata.scan_files.selection_vector, expected_sel_vec);
+            scan_metadata
+                .visit_scan_files(context.clone(), validate_callback)
+                .unwrap();
             batch_count += 1;
         }
         assert_eq!(batch_count, 1);
@@ -959,8 +1012,8 @@ mod tests {
     }
 
     fn get_files_for_scan(scan: Scan, engine: &dyn Engine) -> DeltaResult<Vec<String>> {
-        let scan_data = scan.scan_data(engine)?;
-        fn scan_data_callback(
+        let scan_metadata_iter = scan.scan_metadata(engine)?;
+        fn scan_metadata_callback(
             paths: &mut Vec<String>,
             path: &str,
             _size: i64,
@@ -973,21 +1026,15 @@ mod tests {
             assert!(dv_info.deletion_vector.is_none());
         }
         let mut files = vec![];
-        for data in scan_data {
-            let (data, vec, transforms) = data?;
-            files = state::visit_scan_files(
-                data.as_ref(),
-                &vec,
-                &transforms,
-                files,
-                scan_data_callback,
-            )?;
+        for res in scan_metadata_iter {
+            let scan_metadata = res?;
+            files = scan_metadata.visit_scan_files(files, scan_metadata_callback)?;
         }
         Ok(files)
     }
 
     #[test]
-    fn test_scan_data_paths() {
+    fn test_scan_metadata_paths() {
         let path =
             std::fs::canonicalize(PathBuf::from("./tests/data/table-without-dv-small/")).unwrap();
         let url = url::Url::from_directory_path(path).unwrap();
@@ -1005,7 +1052,7 @@ mod tests {
     }
 
     #[test_log::test]
-    fn test_scan_data() {
+    fn test_scan_metadata() {
         let path =
             std::fs::canonicalize(PathBuf::from("./tests/data/table-without-dv-small/")).unwrap();
         let url = url::Url::from_directory_path(path).unwrap();
@@ -1066,7 +1113,7 @@ mod tests {
     }
 
     #[test]
-    fn test_replay_for_scan_data() {
+    fn test_replay_for_scan_metadata() {
         let path = std::fs::canonicalize(PathBuf::from("./tests/data/parquet_row_group_skipping/"));
         let url = url::Url::from_directory_path(path.unwrap()).unwrap();
         let engine = SyncEngine::new();
@@ -1075,7 +1122,7 @@ mod tests {
         let snapshot = table.snapshot(&engine, None).unwrap();
         let scan = snapshot.into_scan_builder().build().unwrap();
         let data: Vec<_> = scan
-            .replay_for_scan_data(&engine)
+            .replay_for_scan_metadata(&engine)
             .unwrap()
             .try_collect()
             .unwrap();
diff --git a/kernel/src/scan/state.rs b/kernel/src/scan/state.rs
index 85eb6e4a7..b04518026 100644
--- a/kernel/src/scan/state.rs
+++ b/kernel/src/scan/state.rs
@@ -19,6 +19,7 @@ use serde::{Deserialize, Serialize};
 use tracing::warn;
 
 use super::log_replay::SCAN_ROW_SCHEMA;
+use super::ScanMetadata;
 
 /// State that doesn't change between scans
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -68,8 +69,8 @@ impl DvInfo {
         self.deletion_vector
             .as_ref()
             .map(|dv_descriptor| {
-                let fs_client = engine.get_file_system_client();
-                dv_descriptor.read(fs_client, table_root)
+                let storage = engine.storage_handler();
+                dv_descriptor.read(storage, table_root)
             })
             .transpose()
     }
@@ -92,8 +93,8 @@ impl DvInfo {
         self.deletion_vector
             .as_ref()
             .map(|dv| {
-                let fs_client = engine.get_file_system_client();
-                dv.row_indexes(fs_client, table_root)
+                let storage = engine.storage_handler();
+                dv.row_indexes(storage, table_root)
             })
             .transpose()
     }
@@ -110,8 +111,8 @@ pub fn transform_to_logical(
 ) -> DeltaResult<Box<dyn EngineData>> {
     match transform {
         Some(ref transform) => engine
-            .get_expression_handler()
-            .get_evaluator(
+            .evaluation_handler()
+            .new_expression_evaluator(
                 physical_schema.clone(),
                 transform.as_ref().clone(), // TODO: Maybe eval should take a ref
                 logical_schema.clone().into(),
@@ -135,12 +136,13 @@ pub type ScanCallback<T> = fn(
 /// scan.
 ///
 /// The arguments to the callback are:
-/// * `context`: an `&mut context` argument. this can be anything that engine needs to pass through to each call
+/// * `context`: an `&mut context` argument. this can be anything that engine needs to pass through
+///   to each call
 /// * `path`: a `&str` which is the path to the file
 /// * `size`: an `i64` which is the size of the file
 /// * `dv_info`: a [`DvInfo`] struct, which allows getting the selection vector for this file
-/// * `transform`: An optional expression that, if present, _must_ be applied to physical data to convert it to
-///                the correct logical format
+/// * `transform`: An optional expression that, if present, _must_ be applied to physical data to
+///   convert it to the correct logical format
 /// * `partition_values`: a `HashMap<String, String>` which are partition values
 ///
 /// ## Context
@@ -151,33 +153,26 @@ pub type ScanCallback<T> = fn(
 /// ## Example
 /// ```ignore
 /// let mut context = [my context];
-/// for res in scan_data { // scan data from scan.scan_data()
-///     let (data, vector) = res?;
-///     context = delta_kernel::scan::state::visit_scan_files(
-///        data.as_ref(),
-///        selection_vector,
+/// for res in scan_metadata_iter { // scan metadata iterator from scan.scan_metadata()
+///     let scan_metadata = res?;
+///     context = scan_metadata.visit_scan_files(
 ///        context,
 ///        my_callback,
 ///     )?;
 /// }
 /// ```
-pub fn visit_scan_files<T>(
-    data: &dyn EngineData,
-    selection_vector: &[bool],
-    transforms: &[Option<ExpressionRef>],
-    context: T,
-    callback: ScanCallback<T>,
-) -> DeltaResult<T> {
-    let mut visitor = ScanFileVisitor {
-        callback,
-        selection_vector,
-        transforms,
-        context,
-    };
-    visitor.visit_rows_of(data)?;
-    Ok(visitor.context)
+impl ScanMetadata {
+    pub fn visit_scan_files<T>(&self, context: T, callback: ScanCallback<T>) -> DeltaResult<T> {
+        let mut visitor = ScanFileVisitor {
+            callback,
+            selection_vector: &self.scan_files.selection_vector,
+            transforms: &self.scan_file_transforms,
+            context,
+        };
+        visitor.visit_rows_of(self.scan_files.data.as_ref())?;
+        Ok(visitor.context)
+    }
 }
-
 // add some visitor magic for engines
 struct ScanFileVisitor<'a, T> {
     callback: ScanCallback<T>,
@@ -243,6 +238,7 @@ impl<T> RowVisitor for ScanFileVisitor<'_, T> {
 mod tests {
     use std::collections::HashMap;
 
+    use crate::actions::get_log_schema;
     use crate::scan::test_utils::{add_batch_simple, run_with_validate_callback};
     use crate::ExpressionRef;
 
@@ -279,10 +275,10 @@ mod tests {
     }
 
     #[test]
-    fn test_simple_visit_scan_data() {
+    fn test_simple_visit_scan_metadata() {
         let context = TestContext { id: 2 };
         run_with_validate_callback(
-            vec![add_batch_simple()],
+            vec![add_batch_simple(get_log_schema().clone())],
             None, // not testing schema
             None, // not testing transform
             &[true, false],
diff --git a/kernel/src/schema/mod.rs b/kernel/src/schema/mod.rs
index 6086a7031..3a5648b57 100644
--- a/kernel/src/schema/mod.rs
+++ b/kernel/src/schema/mod.rs
@@ -22,7 +22,7 @@ pub type SchemaRef = Arc<StructType>;
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)]
 #[serde(untagged)]
 pub enum MetadataValue {
-    Number(i32),
+    Number(i64),
     String(String),
     Boolean(bool),
     // The [PROTOCOL](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#struct-field) states
@@ -32,8 +32,8 @@ pub enum MetadataValue {
     Other(serde_json::Value),
 }
 
-impl std::fmt::Display for MetadataValue {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl Display for MetadataValue {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
             MetadataValue::Number(n) => write!(f, "{n}"),
             MetadataValue::String(s) => write!(f, "{s}"),
@@ -61,8 +61,8 @@ impl From<&str> for MetadataValue {
     }
 }
 
-impl From<i32> for MetadataValue {
-    fn from(value: i32) -> Self {
+impl From<i64> for MetadataValue {
+    fn from(value: i64) -> Self {
         Self::Number(value)
     }
 }
@@ -226,6 +226,11 @@ impl StructField {
             .unwrap()
             .into_owned()
     }
+
+    fn has_invariants(&self) -> bool {
+        self.metadata
+            .contains_key(ColumnMetadataKey::Invariants.as_ref())
+    }
 }
 
 /// A struct is used to represent both the top-level schema of the table
@@ -286,6 +291,16 @@ impl StructType {
         self.fields.values()
     }
 
+    pub(crate) fn fields_len(&self) -> usize {
+        // O(1) for indexmap
+        self.fields.len()
+    }
+
+    // Checks if the `StructType` contains a field with the specified name.
+    pub(crate) fn contains(&self, name: impl AsRef<str>) -> bool {
+        self.fields.contains_key(name.as_ref())
+    }
+
     /// Extracts the name and type of all leaf columns, in schema order. Caller should pass Some
     /// `own_name` if this schema is embedded in a larger struct (e.g. `add.*`) and None if the
     /// schema is a top-level result (e.g. `*`).
@@ -300,6 +315,34 @@ impl StructType {
     }
 }
 
+#[derive(Debug, Default)]
+pub(crate) struct InvariantChecker {
+    has_invariants: bool,
+}
+
+impl<'a> SchemaTransform<'a> for InvariantChecker {
+    fn transform_struct_field(&mut self, field: &'a StructField) -> Option<Cow<'a, StructField>> {
+        if field.has_invariants() {
+            self.has_invariants = true;
+        } else if !self.has_invariants {
+            let _ = self.recurse_into_struct_field(field);
+        }
+        Some(Cow::Borrowed(field))
+    }
+}
+
+impl InvariantChecker {
+    /// Checks if any column in the schema (including nested columns) has invariants defined.
+    ///
+    /// This traverses the entire schema to check for the presence of the "delta.invariants"
+    /// metadata key.
+    pub(crate) fn has_invariants(schema: &Schema) -> bool {
+        let mut checker = InvariantChecker::default();
+        let _ = checker.transform_struct(schema);
+        checker.has_invariants
+    }
+}
+
 /// Helper for RowVisitor implementations
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 #[derive(Clone, Default)]
@@ -1034,16 +1077,22 @@ mod tests {
             "nullable": true,
             "metadata": {
                 "delta.columnMapping.id": 4,
-                "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1"
+                "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1",
+                "delta.identity.start": 2147483648
             }
         }
         "#;
+
         let field: StructField = serde_json::from_str(data).unwrap();
 
         let col_id = field
             .get_config_value(&ColumnMetadataKey::ColumnMappingId)
             .unwrap();
+        let id_start = field
+            .get_config_value(&ColumnMetadataKey::IdentityStart)
+            .unwrap();
         assert!(matches!(col_id, MetadataValue::Number(num) if *num == 4));
+        assert!(matches!(id_start, MetadataValue::Number(num) if *num == 2147483648i64));
         assert_eq!(
             field.physical_name(),
             "col-5f422f40-de70-45b2-88ab-1d5c90e94db1"
@@ -1203,4 +1252,111 @@ mod tests {
             "[\"an\",\"array\"]"
         );
     }
+
+    #[test]
+    fn test_fields_len() {
+        let schema = StructType::new([]);
+        assert!(schema.fields_len() == 0);
+        let schema = StructType::new([
+            StructField::nullable("a", DataType::LONG),
+            StructField::nullable("b", DataType::LONG),
+            StructField::nullable("c", DataType::LONG),
+            StructField::nullable("d", DataType::LONG),
+        ]);
+        assert_eq!(schema.fields_len(), 4);
+        let schema = StructType::new([
+            StructField::nullable("b", DataType::LONG),
+            StructField::not_null("b", DataType::LONG),
+            StructField::nullable("c", DataType::LONG),
+            StructField::nullable("c", DataType::LONG),
+        ]);
+        assert_eq!(schema.fields_len(), 2);
+    }
+
+    #[test]
+    fn test_has_invariants() {
+        // Schema with no invariants
+        let schema = StructType::new([
+            StructField::nullable("a", DataType::STRING),
+            StructField::nullable("b", DataType::INTEGER),
+        ]);
+        assert!(!InvariantChecker::has_invariants(&schema));
+
+        // Schema with top-level invariant
+        let mut field = StructField::nullable("c", DataType::STRING);
+        field.metadata.insert(
+            ColumnMetadataKey::Invariants.as_ref().to_string(),
+            MetadataValue::String("c > 0".to_string()),
+        );
+
+        let schema = StructType::new([StructField::nullable("a", DataType::STRING), field]);
+        assert!(InvariantChecker::has_invariants(&schema));
+
+        // Schema with nested invariant in a struct
+        let nested_field = StructField::nullable(
+            "nested_c",
+            DataType::struct_type([{
+                let mut field = StructField::nullable("d", DataType::INTEGER);
+                field.metadata.insert(
+                    ColumnMetadataKey::Invariants.as_ref().to_string(),
+                    MetadataValue::String("d > 0".to_string()),
+                );
+                field
+            }]),
+        );
+
+        let schema = StructType::new([
+            StructField::nullable("a", DataType::STRING),
+            StructField::nullable("b", DataType::INTEGER),
+            nested_field,
+        ]);
+        assert!(InvariantChecker::has_invariants(&schema));
+
+        // Schema with nested invariant in an array of structs
+        let array_field = StructField::nullable(
+            "array_field",
+            ArrayType::new(
+                DataType::struct_type([{
+                    let mut field = StructField::nullable("d", DataType::INTEGER);
+                    field.metadata.insert(
+                        ColumnMetadataKey::Invariants.as_ref().to_string(),
+                        MetadataValue::String("d > 0".to_string()),
+                    );
+                    field
+                }]),
+                true,
+            ),
+        );
+
+        let schema = StructType::new([
+            StructField::nullable("a", DataType::STRING),
+            StructField::nullable("b", DataType::INTEGER),
+            array_field,
+        ]);
+        assert!(InvariantChecker::has_invariants(&schema));
+
+        // Schema with nested invariant in a map value that's a struct
+        let map_field = StructField::nullable(
+            "map_field",
+            MapType::new(
+                DataType::STRING,
+                DataType::struct_type([{
+                    let mut field = StructField::nullable("d", DataType::INTEGER);
+                    field.metadata.insert(
+                        ColumnMetadataKey::Invariants.as_ref().to_string(),
+                        MetadataValue::String("d > 0".to_string()),
+                    );
+                    field
+                }]),
+                true,
+            ),
+        );
+
+        let schema = StructType::new([
+            StructField::nullable("a", DataType::STRING),
+            StructField::nullable("b", DataType::INTEGER),
+            map_field,
+        ]);
+        assert!(InvariantChecker::has_invariants(&schema));
+    }
 }
diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs
index f198b9080..8b0bc86fd 100644
--- a/kernel/src/snapshot.rs
+++ b/kernel/src/snapshot.rs
@@ -7,13 +7,13 @@ use tracing::{debug, warn};
 use url::Url;
 
 use crate::actions::{Metadata, Protocol};
-use crate::log_segment::LogSegment;
+use crate::log_segment::{self, LogSegment};
 use crate::scan::ScanBuilder;
-use crate::schema::Schema;
+use crate::schema::{Schema, SchemaRef};
 use crate::table_configuration::TableConfiguration;
 use crate::table_features::ColumnMappingMode;
 use crate::table_properties::TableProperties;
-use crate::{DeltaResult, Engine, Error, FileSystemClient, Version};
+use crate::{DeltaResult, Engine, Error, StorageHandler, Version};
 
 const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint";
 // TODO expose methods for accessing the files of a table (with file pruning).
@@ -21,6 +21,7 @@ const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint";
 /// throughout time, `Snapshot`s represent a view of a table at a specific point in time; they
 /// have a defined schema (which may change over time for any given table), specific version, and
 /// frozen log segment.
+#[derive(PartialEq, Eq)]
 pub struct Snapshot {
     log_segment: LogSegment,
     table_configuration: TableConfiguration,
@@ -43,30 +44,188 @@ impl std::fmt::Debug for Snapshot {
 }
 
 impl Snapshot {
+    fn new(log_segment: LogSegment, table_configuration: TableConfiguration) -> Self {
+        Self {
+            log_segment,
+            table_configuration,
+        }
+    }
+
     /// Create a new [`Snapshot`] instance for the given version.
     ///
     /// # Parameters
     ///
     /// - `table_root`: url pointing at the table root (where `_delta_log` folder is located)
     /// - `engine`: Implementation of [`Engine`] apis.
-    /// - `version`: target version of the [`Snapshot`]
+    /// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest
+    ///   version of the table.
     pub fn try_new(
         table_root: Url,
         engine: &dyn Engine,
         version: Option<Version>,
     ) -> DeltaResult<Self> {
-        let fs_client = engine.get_file_system_client();
+        let storage = engine.storage_handler();
         let log_root = table_root.join("_delta_log/")?;
 
-        let checkpoint_hint = read_last_checkpoint(fs_client.as_ref(), &log_root)?;
+        let checkpoint_hint = read_last_checkpoint(storage.as_ref(), &log_root)?;
 
         let log_segment =
-            LogSegment::for_snapshot(fs_client.as_ref(), log_root, checkpoint_hint, version)?;
+            LogSegment::for_snapshot(storage.as_ref(), log_root, checkpoint_hint, version)?;
 
         // try_new_from_log_segment will ensure the protocol is supported
         Self::try_new_from_log_segment(table_root, log_segment, engine)
     }
 
+    /// Create a new [`Snapshot`] instance from an existing [`Snapshot`]. This is useful when you
+    /// already have a [`Snapshot`] lying around and want to do the minimal work to 'update' the
+    /// snapshot to a later version.
+    ///
+    /// We implement a simple heuristic:
+    /// 1. if the new version == existing version, just return the existing snapshot
+    /// 2. if the new version < existing version, error: there is no optimization to do here
+    /// 3. list from (existing checkpoint version + 1) onward (or just existing snapshot version if
+    ///    no checkpoint)
+    /// 4. a. if new checkpoint is found: just create a new snapshot from that checkpoint (and
+    ///    commits after it)
+    ///    b. if no new checkpoint is found: do lightweight P+M replay on the latest commits (after
+    ///    ensuring we only retain commits > any checkpoints)
+    ///
+    /// # Parameters
+    ///
+    /// - `existing_snapshot`: reference to an existing [`Snapshot`]
+    /// - `engine`: Implementation of [`Engine`] apis.
+    /// - `version`: target version of the [`Snapshot`]. None will create a snapshot at the latest
+    ///   version of the table.
+    pub fn try_new_from(
+        existing_snapshot: Arc<Snapshot>,
+        engine: &dyn Engine,
+        version: impl Into<Option<Version>>,
+    ) -> DeltaResult<Arc<Self>> {
+        let old_log_segment = &existing_snapshot.log_segment;
+        let old_version = existing_snapshot.version();
+        let new_version = version.into();
+        if let Some(new_version) = new_version {
+            if new_version == old_version {
+                // Re-requesting the same version
+                return Ok(existing_snapshot.clone());
+            }
+            if new_version < old_version {
+                // Hint is too new: error since this is effectively an incorrect optimization
+                return Err(Error::Generic(format!(
+                    "Requested snapshot version {} is older than snapshot hint version {}",
+                    new_version, old_version
+                )));
+            }
+        }
+
+        let log_root = old_log_segment.log_root.clone();
+        let storage = engine.storage_handler();
+
+        // Start listing just after the previous segment's checkpoint, if any
+        let listing_start = old_log_segment.checkpoint_version.unwrap_or(0) + 1;
+
+        // Check for new commits
+        let (new_ascending_commit_files, checkpoint_parts) =
+            log_segment::list_log_files_with_version(
+                storage.as_ref(),
+                &log_root,
+                Some(listing_start),
+                new_version,
+            )?;
+
+        // NB: we need to check both checkpoints and commits since we filter commits at and below
+        // the checkpoint version. Example: if we have a checkpoint + commit at version 1, the log
+        // listing above will only return the checkpoint and not the commit.
+        if new_ascending_commit_files.is_empty() && checkpoint_parts.is_empty() {
+            match new_version {
+                Some(new_version) if new_version != old_version => {
+                    // No new commits, but we are looking for a new version
+                    return Err(Error::Generic(format!(
+                        "Requested snapshot version {} is newer than the latest version {}",
+                        new_version, old_version
+                    )));
+                }
+                _ => {
+                    // No new commits, just return the same snapshot
+                    return Ok(existing_snapshot.clone());
+                }
+            }
+        }
+
+        // create a log segment just from existing_checkpoint.version -> new_version
+        // OR could be from 1 -> new_version
+        let mut new_log_segment = LogSegment::try_new(
+            new_ascending_commit_files,
+            checkpoint_parts,
+            log_root.clone(),
+            new_version,
+        )?;
+
+        let new_end_version = new_log_segment.end_version;
+        if new_end_version < old_version {
+            // we should never see a new log segment with a version < the existing snapshot
+            // version, that would mean a commit was incorrectly deleted from the log
+            return Err(Error::Generic(format!(
+                "Unexpected state: The newest version in the log {} is older than the old version {}",
+                new_end_version, old_version)));
+        }
+        if new_end_version == old_version {
+            // No new commits, just return the same snapshot
+            return Ok(existing_snapshot.clone());
+        }
+
+        if new_log_segment.checkpoint_version.is_some() {
+            // we have a checkpoint in the new LogSegment, just construct a new snapshot from that
+            let snapshot = Self::try_new_from_log_segment(
+                existing_snapshot.table_root().clone(),
+                new_log_segment,
+                engine,
+            );
+            return Ok(Arc::new(snapshot?));
+        }
+
+        // after this point, we incrementally update the snapshot with the new log segment.
+        // first we remove the 'overlap' in commits, example:
+        //
+        //    old logsegment checkpoint1-commit1-commit2-commit3
+        // 1. new logsegment             commit1-commit2-commit3
+        // 2. new logsegment             commit1-commit2-commit3-commit4
+        // 3. new logsegment                     checkpoint2+commit2-commit3-commit4
+        //
+        // retain does
+        // 1. new logsegment             [empty] -> caught above
+        // 2. new logsegment             [commit4]
+        // 3. new logsegment             [checkpoint2-commit3] -> caught above
+        new_log_segment
+            .ascending_commit_files
+            .retain(|log_path| old_version < log_path.version);
+
+        // we have new commits and no new checkpoint: we replay new commits for P+M and then
+        // create a new snapshot by combining LogSegments and building a new TableConfiguration
+        let (new_metadata, new_protocol) = new_log_segment.protocol_and_metadata(engine)?;
+        let table_configuration = TableConfiguration::try_new_from(
+            existing_snapshot.table_configuration(),
+            new_metadata,
+            new_protocol,
+            new_log_segment.end_version,
+        )?;
+        // NB: we must add the new log segment to the existing snapshot's log segment
+        let mut ascending_commit_files = old_log_segment.ascending_commit_files.clone();
+        ascending_commit_files.extend(new_log_segment.ascending_commit_files);
+        // we can pass in just the old checkpoint parts since by the time we reach this line, we
+        // know there are no checkpoints in the new log segment.
+        let combined_log_segment = LogSegment::try_new(
+            ascending_commit_files,
+            old_log_segment.checkpoint_parts.clone(),
+            log_root,
+            new_version,
+        )?;
+        Ok(Arc::new(Snapshot::new(
+            combined_log_segment,
+            table_configuration,
+        )))
+    }
+
     /// Create a new [`Snapshot`] instance.
     pub(crate) fn try_new_from_log_segment(
         location: Url,
@@ -97,18 +256,21 @@ impl Snapshot {
         self.table_configuration().version()
     }
 
-    /// Table [`Schema`] at this `Snapshot`s version.
-    pub fn schema(&self) -> &Schema {
+    /// Table [`type@Schema`] at this `Snapshot`s version.
+    pub fn schema(&self) -> SchemaRef {
         self.table_configuration.schema()
     }
 
     /// Table [`Metadata`] at this `Snapshot`s version.
-    pub fn metadata(&self) -> &Metadata {
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    pub(crate) fn metadata(&self) -> &Metadata {
         self.table_configuration.metadata()
     }
 
     /// Table [`Protocol`] at this `Snapshot`s version.
-    pub fn protocol(&self) -> &Protocol {
+    #[allow(dead_code)]
+    #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    pub(crate) fn protocol(&self) -> &Protocol {
         self.table_configuration.protocol()
     }
 
@@ -139,11 +301,12 @@ impl Snapshot {
     }
 }
 
+// Note: Schema can not be derived because the checkpoint schema is only known at runtime.
 #[derive(Debug, Deserialize, Serialize)]
 #[serde(rename_all = "camelCase")]
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
 #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))]
-struct CheckpointMetadata {
+struct LastCheckpointHint {
     /// The version of the table when the last checkpoint was made.
     #[allow(unreachable_pub)] // used by acceptance tests (TODO make an fn accessor?)
     pub version: Version,
@@ -170,11 +333,11 @@ struct CheckpointMetadata {
 ///
 /// TODO: java kernel retries three times before failing, should we do the same?
 fn read_last_checkpoint(
-    fs_client: &dyn FileSystemClient,
+    storage: &dyn StorageHandler,
     log_root: &Url,
-) -> DeltaResult<Option<CheckpointMetadata>> {
+) -> DeltaResult<Option<LastCheckpointHint>> {
     let file_path = log_root.join(LAST_CHECKPOINT_FILE_NAME)?;
-    match fs_client
+    match storage
         .read_files(vec![(file_path, None)])
         .and_then(|mut data| data.next().expect("read_files should return one file"))
     {
@@ -197,12 +360,20 @@ mod tests {
     use object_store::memory::InMemory;
     use object_store::path::Path;
     use object_store::ObjectStore;
+    use serde_json::json;
+
+    use crate::arrow::array::StringArray;
+    use crate::arrow::record_batch::RecordBatch;
+    use crate::parquet::arrow::ArrowWriter;
 
+    use crate::engine::arrow_data::ArrowEngineData;
     use crate::engine::default::executor::tokio::TokioBackgroundExecutor;
-    use crate::engine::default::filesystem::ObjectStoreFileSystemClient;
+    use crate::engine::default::filesystem::ObjectStoreStorageHandler;
+    use crate::engine::default::DefaultEngine;
     use crate::engine::sync::SyncEngine;
     use crate::path::ParsedLogPath;
-    use crate::schema::StructType;
+    use crate::utils::test_utils::string_array_to_engine_data;
+    use test_utils::{add_commit, delta_path_for_version};
 
     #[test]
     fn test_snapshot_read_metadata() {
@@ -218,8 +389,8 @@ mod tests {
         assert_eq!(snapshot.protocol(), &expected);
 
         let schema_string = r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#;
-        let expected: StructType = serde_json::from_str(schema_string).unwrap();
-        assert_eq!(snapshot.schema(), &expected);
+        let expected: SchemaRef = serde_json::from_str(schema_string).unwrap();
+        assert_eq!(snapshot.schema(), expected);
     }
 
     #[test]
@@ -236,8 +407,213 @@ mod tests {
         assert_eq!(snapshot.protocol(), &expected);
 
         let schema_string = r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#;
-        let expected: StructType = serde_json::from_str(schema_string).unwrap();
-        assert_eq!(snapshot.schema(), &expected);
+        let expected: SchemaRef = serde_json::from_str(schema_string).unwrap();
+        assert_eq!(snapshot.schema(), expected);
+    }
+
+    // interesting cases for testing Snapshot::new_from:
+    // 1. new version < existing version
+    // 2. new version == existing version
+    // 3. new version > existing version AND
+    //   a. log segment hasn't changed
+    //   b. log segment for old..=new version has a checkpoint (with new protocol/metadata)
+    //   b. log segment for old..=new version has no checkpoint
+    //     i. commits have (new protocol, new metadata)
+    //     ii. commits have (new protocol, no metadata)
+    //     iii. commits have (no protocol, new metadata)
+    //     iv. commits have (no protocol, no metadata)
+    #[tokio::test]
+    async fn test_snapshot_new_from() -> DeltaResult<()> {
+        let path =
+            std::fs::canonicalize(PathBuf::from("./tests/data/table-with-dv-small/")).unwrap();
+        let url = url::Url::from_directory_path(path).unwrap();
+
+        let engine = SyncEngine::new();
+        let old_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(1)).unwrap());
+        // 1. new version < existing version: error
+        let snapshot_res = Snapshot::try_new_from(old_snapshot.clone(), &engine, Some(0));
+        assert!(matches!(
+            snapshot_res,
+            Err(Error::Generic(msg)) if msg == "Requested snapshot version 0 is older than snapshot hint version 1"
+        ));
+
+        // 2. new version == existing version
+        let snapshot = Snapshot::try_new_from(old_snapshot.clone(), &engine, Some(1)).unwrap();
+        let expected = old_snapshot.clone();
+        assert_eq!(snapshot, expected);
+
+        // tests Snapshot::new_from by:
+        // 1. creating a snapshot with new API for commits 0..=2 (based on old snapshot at 0)
+        // 2. comparing with a snapshot created directly at version 2
+        //
+        // the commits tested are:
+        // - commit 0 -> base snapshot at this version
+        // - commit 1 -> final snapshots at this version
+        //
+        // in each test we will modify versions 1 and 2 to test different scenarios
+        fn test_new_from(store: Arc<InMemory>) -> DeltaResult<()> {
+            let url = Url::parse("memory:///")?;
+            let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new()));
+            let base_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(0))?);
+            let snapshot = Snapshot::try_new_from(base_snapshot.clone(), &engine, Some(1))?;
+            let expected = Snapshot::try_new(url.clone(), &engine, Some(1))?;
+            assert_eq!(snapshot, expected.into());
+            Ok(())
+        }
+
+        // TODO: unify this and lots of stuff in LogSegment tests and test_utils
+        async fn commit(store: &InMemory, version: Version, commit: Vec<serde_json::Value>) {
+            let commit_data = commit
+                .iter()
+                .map(ToString::to_string)
+                .collect::<Vec<String>>()
+                .join("\n");
+            add_commit(store, version, commit_data).await.unwrap();
+        }
+
+        // for (3) we will just engineer custom log files
+        let store = Arc::new(InMemory::new());
+        // everything will have a starting 0 commit with commitInfo, protocol, metadata
+        let commit0 = vec![
+            json!({
+                "commitInfo": {
+                    "timestamp": 1587968586154i64,
+                    "operation": "WRITE",
+                    "operationParameters": {"mode":"ErrorIfExists","partitionBy":"[]"},
+                    "isBlindAppend":true
+                }
+            }),
+            json!({
+                "protocol": {
+                    "minReaderVersion": 1,
+                    "minWriterVersion": 2
+                }
+            }),
+            json!({
+                "metaData": {
+                    "id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9",
+                    "format": {
+                        "provider": "parquet",
+                        "options": {}
+                    },
+                    "schemaString": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}",
+                    "partitionColumns": [],
+                    "configuration": {},
+                    "createdTime": 1587968585495i64
+                }
+            }),
+        ];
+        commit(store.as_ref(), 0, commit0.clone()).await;
+        // 3. new version > existing version
+        // a. no new log segment
+        let url = Url::parse("memory:///")?;
+        let engine = DefaultEngine::new(
+            Arc::new(store.fork()),
+            Arc::new(TokioBackgroundExecutor::new()),
+        );
+        let base_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(0))?);
+        let snapshot = Snapshot::try_new_from(base_snapshot.clone(), &engine, None)?;
+        let expected = Snapshot::try_new(url.clone(), &engine, Some(0))?;
+        assert_eq!(snapshot, expected.into());
+        // version exceeds latest version of the table = err
+        assert!(matches!(
+            Snapshot::try_new_from(base_snapshot.clone(), &engine, Some(1)),
+            Err(Error::Generic(msg)) if msg == "Requested snapshot version 1 is newer than the latest version 0"
+        ));
+
+        // b. log segment for old..=new version has a checkpoint (with new protocol/metadata)
+        let store_3a = store.fork();
+        let mut checkpoint1 = commit0.clone();
+        commit(&store_3a, 1, commit0.clone()).await;
+        checkpoint1[1] = json!({
+            "protocol": {
+                "minReaderVersion": 2,
+                "minWriterVersion": 5
+            }
+        });
+        checkpoint1[2]["partitionColumns"] = serde_json::to_value(["some_partition_column"])?;
+
+        let handler = engine.json_handler();
+        let json_strings: StringArray = checkpoint1
+            .into_iter()
+            .map(|json| json.to_string())
+            .collect::<Vec<_>>()
+            .into();
+        let parsed = handler
+            .parse_json(
+                string_array_to_engine_data(json_strings),
+                crate::actions::get_log_schema().clone(),
+            )
+            .unwrap();
+        let checkpoint = ArrowEngineData::try_from_engine_data(parsed).unwrap();
+        let checkpoint: RecordBatch = checkpoint.into();
+
+        // Write the record batch to a Parquet file
+        let mut buffer = vec![];
+        let mut writer = ArrowWriter::try_new(&mut buffer, checkpoint.schema(), None)?;
+        writer.write(&checkpoint)?;
+        writer.close()?;
+
+        store
+            .put(
+                &delta_path_for_version(1, "checkpoint.parquet"),
+                buffer.into(),
+            )
+            .await
+            .unwrap();
+        test_new_from(store_3a.into())?;
+
+        // c. log segment for old..=new version has no checkpoint
+        // i. commits have (new protocol, new metadata)
+        let store_3c_i = Arc::new(store.fork());
+        let mut commit1 = commit0.clone();
+        commit1[1] = json!({
+            "protocol": {
+                "minReaderVersion": 2,
+                "minWriterVersion": 5
+            }
+        });
+        commit1[2]["partitionColumns"] = serde_json::to_value(["some_partition_column"])?;
+        commit(store_3c_i.as_ref(), 1, commit1).await;
+        test_new_from(store_3c_i.clone())?;
+
+        // new commits AND request version > end of log
+        let url = Url::parse("memory:///")?;
+        let engine = DefaultEngine::new(store_3c_i, Arc::new(TokioBackgroundExecutor::new()));
+        let base_snapshot = Arc::new(Snapshot::try_new(url.clone(), &engine, Some(0))?);
+        assert!(matches!(
+            Snapshot::try_new_from(base_snapshot.clone(), &engine, Some(2)),
+            Err(Error::Generic(msg)) if msg == "LogSegment end version 1 not the same as the specified end version 2"
+        ));
+
+        // ii. commits have (new protocol, no metadata)
+        let store_3c_ii = store.fork();
+        let mut commit1 = commit0.clone();
+        commit1[1] = json!({
+            "protocol": {
+                "minReaderVersion": 2,
+                "minWriterVersion": 5
+            }
+        });
+        commit1.remove(2); // remove metadata
+        commit(&store_3c_ii, 1, commit1).await;
+        test_new_from(store_3c_ii.into())?;
+
+        // iii. commits have (no protocol, new metadata)
+        let store_3c_iii = store.fork();
+        let mut commit1 = commit0.clone();
+        commit1[2]["partitionColumns"] = serde_json::to_value(["some_partition_column"])?;
+        commit1.remove(1); // remove protocol
+        commit(&store_3c_iii, 1, commit1).await;
+        test_new_from(store_3c_iii.into())?;
+
+        // iv. commits have (no protocol, no metadata)
+        let store_3c_iv = store.fork();
+        let commit1 = vec![commit0[0].clone()];
+        commit(&store_3c_iv, 1, commit1).await;
+        test_new_from(store_3c_iv.into())?;
+
+        Ok(())
     }
 
     #[test]
@@ -249,14 +625,12 @@ mod tests {
         let url = url::Url::from_directory_path(path).unwrap();
 
         let store = Arc::new(LocalFileSystem::new());
-        let prefix = Path::from(url.path());
-        let client = ObjectStoreFileSystemClient::new(
+        let storage = ObjectStoreStorageHandler::new(
             store,
             false, // don't have ordered listing
-            prefix,
             Arc::new(TokioBackgroundExecutor::new()),
         );
-        let cp = read_last_checkpoint(&client, &url).unwrap();
+        let cp = read_last_checkpoint(&storage, &url).unwrap();
         assert!(cp.is_none())
     }
 
@@ -288,16 +662,15 @@ mod tests {
                     .expect("put _last_checkpoint");
             });
 
-        let client = ObjectStoreFileSystemClient::new(
+        let storage = ObjectStoreStorageHandler::new(
             store,
             false, // don't have ordered listing
-            Path::from("/"),
             Arc::new(TokioBackgroundExecutor::new()),
         );
         let url = Url::parse("memory:///valid/").expect("valid url");
-        let valid = read_last_checkpoint(&client, &url).expect("read last checkpoint");
+        let valid = read_last_checkpoint(&storage, &url).expect("read last checkpoint");
         let url = Url::parse("memory:///invalid/").expect("valid url");
-        let invalid = read_last_checkpoint(&client, &url).expect("read last checkpoint");
+        let invalid = read_last_checkpoint(&storage, &url).expect("read last checkpoint");
         assert!(valid.is_some());
         assert!(invalid.is_none())
     }
diff --git a/kernel/src/table_changes/log_replay.rs b/kernel/src/table_changes/log_replay.rs
index 89951a39b..20fc11c6e 100644
--- a/kernel/src/table_changes/log_replay.rs
+++ b/kernel/src/table_changes/log_replay.rs
@@ -27,21 +27,21 @@ use itertools::Itertools;
 #[cfg(test)]
 mod tests;
 
-/// Scan data for a Change Data Feed query. This holds metadata that is needed to read data rows.
-pub(crate) struct TableChangesScanData {
+/// Scan metadata for a Change Data Feed query. This holds metadata that's needed to read data rows.
+pub(crate) struct TableChangesScanMetadata {
     /// Engine data with the schema defined in [`scan_row_schema`]
     ///
     /// Note: The schema of the engine data will be updated in the future to include columns
     /// used by Change Data Feed.
-    pub(crate) scan_data: Box<dyn EngineData>,
-    /// The selection vector used to filter the `scan_data`.
+    pub(crate) scan_metadata: Box<dyn EngineData>,
+    /// The selection vector used to filter the `scan_metadata`.
     pub(crate) selection_vector: Vec<bool>,
     /// A map from a remove action's path to its deletion vector
     pub(crate) remove_dvs: Arc<HashMap<String, DvInfo>>,
 }
 
-/// Given an iterator of [`ParsedLogPath`] returns an iterator of [`TableChangesScanData`].
-/// Each row that is selected in the returned `TableChangesScanData.scan_data` (according
+/// Given an iterator of [`ParsedLogPath`] returns an iterator of [`TableChangesScanMetadata`].
+/// Each row that is selected in the returned `TableChangesScanMetadata.scan_metadata` (according
 /// to the `selection_vector` field) _must_ be processed to complete the scan. Non-selected
 /// rows _must_ be ignored.
 ///
@@ -52,7 +52,7 @@ pub(crate) fn table_changes_action_iter(
     commit_files: impl IntoIterator<Item = ParsedLogPath>,
     table_schema: SchemaRef,
     physical_predicate: Option<(ExpressionRef, SchemaRef)>,
-) -> DeltaResult<impl Iterator<Item = DeltaResult<TableChangesScanData>>> {
+) -> DeltaResult<impl Iterator<Item = DeltaResult<TableChangesScanMetadata>>> {
     let filter = DataSkippingFilter::new(engine.as_ref(), physical_predicate).map(Arc::new);
     let result = commit_files
         .into_iter()
@@ -65,8 +65,9 @@ pub(crate) fn table_changes_action_iter(
     Ok(result)
 }
 
-/// Processes a single commit file from the log to generate an iterator of [`TableChangesScanData`].
-/// The scanner operates in two phases that _must_ be performed in the following order:
+/// Processes a single commit file from the log to generate an iterator of
+/// [`TableChangesScanMetadata`]. The scanner operates in two phases that _must_ be performed in the
+/// following order:
 /// 1. Prepare phase [`LogReplayScanner::try_new`]: This iterates over every action in the commit.
 ///    In this phase, we do the following:
 ///     - Determine if there exist any `cdc` actions. We determine this in the first phase because
@@ -100,7 +101,7 @@ pub(crate) fn table_changes_action_iter(
 /// See https://github.com/delta-io/delta-kernel-rs/issues/559
 ///
 /// 2. Scan file generation phase [`LogReplayScanner::into_scan_batches`]: This iterates over every
-///    action in the commit, and generates [`TableChangesScanData`]. It does so by transforming the
+///    action in the commit, and generates [`TableChangesScanMetadata`]. It does so by transforming the
 ///    actions using [`add_transform_expr`], and generating selection vectors with the following rules:
 ///     - If a `cdc` action was found in the prepare phase, only `cdc` actions are selected
 ///     - Otherwise, select `add` and `remove` actions. Note that only `remove` actions that do not
@@ -125,7 +126,7 @@ struct LogReplayScanner {
     // generated by in-commit timestamps, that timestamp will be used instead.
     //
     // Note: This will be used once an expression is introduced to transform the engine data in
-    // [`TableChangesScanData`]
+    // [`TableChangesScanMetadata`]
     timestamp: i64,
 }
 
@@ -154,7 +155,7 @@ impl LogReplayScanner {
         // As a result, we would read the file path for the remove action, which is unnecessary because
         // all of the rows will be filtered by the predicate. Instead, we wait until deletion
         // vectors are resolved so that we can skip both actions in the pair.
-        let action_iter = engine.get_json_handler().read_json_files(
+        let action_iter = engine.json_handler().read_json_files(
             &[commit_file.location.clone()],
             visitor_schema,
             None, // not safe to apply data skipping yet
@@ -208,14 +209,14 @@ impl LogReplayScanner {
             remove_dvs,
         })
     }
-    /// Generates an iterator of [`TableChangesScanData`] by iterating over each action of the
+    /// Generates an iterator of [`TableChangesScanMetadata`] by iterating over each action of the
     /// commit, generating a selection vector, and transforming the engine data. This performs
     /// phase 2 of [`LogReplayScanner`].
     fn into_scan_batches(
         self,
         engine: Arc<dyn Engine>,
         filter: Option<Arc<DataSkippingFilter>>,
-    ) -> DeltaResult<impl Iterator<Item = DeltaResult<TableChangesScanData>>> {
+    ) -> DeltaResult<impl Iterator<Item = DeltaResult<TableChangesScanMetadata>>> {
         let Self {
             has_cdc_action,
             remove_dvs,
@@ -226,16 +227,15 @@ impl LogReplayScanner {
         let remove_dvs = Arc::new(remove_dvs);
 
         let schema = FileActionSelectionVisitor::schema();
-        let action_iter = engine.get_json_handler().read_json_files(
-            &[commit_file.location.clone()],
-            schema,
-            None,
-        )?;
+        let action_iter =
+            engine
+                .json_handler()
+                .read_json_files(&[commit_file.location.clone()], schema, None)?;
         let commit_version = commit_file
             .version
             .try_into()
             .map_err(|_| Error::generic("Failed to convert commit version to i64"))?;
-        let evaluator = engine.get_expression_handler().get_evaluator(
+        let evaluator = engine.evaluation_handler().new_expression_evaluator(
             get_log_add_schema().clone(),
             cdf_scan_row_expression(timestamp, commit_version),
             cdf_scan_row_schema().into(),
@@ -255,9 +255,9 @@ impl LogReplayScanner {
             let mut visitor =
                 FileActionSelectionVisitor::new(&remove_dvs, selection_vector, has_cdc_action);
             visitor.visit_rows_of(actions.as_ref())?;
-            let scan_data = evaluator.evaluate(actions.as_ref())?;
-            Ok(TableChangesScanData {
-                scan_data,
+            let scan_metadata = evaluator.evaluate(actions.as_ref())?;
+            Ok(TableChangesScanMetadata {
+                scan_metadata,
                 selection_vector: visitor.selection_vector,
                 remove_dvs: remove_dvs.clone(),
             })
diff --git a/kernel/src/table_changes/log_replay/tests.rs b/kernel/src/table_changes/log_replay/tests.rs
index 35c4a99f8..babdde516 100644
--- a/kernel/src/table_changes/log_replay/tests.rs
+++ b/kernel/src/table_changes/log_replay/tests.rs
@@ -1,5 +1,5 @@
 use super::table_changes_action_iter;
-use super::TableChangesScanData;
+use super::TableChangesScanMetadata;
 use crate::actions::deletion_vector::DeletionVectorDescriptor;
 use crate::actions::{Add, Cdc, Metadata, Protocol, Remove};
 use crate::engine::sync::SyncEngine;
@@ -11,7 +11,7 @@ use crate::scan::state::DvInfo;
 use crate::scan::PhysicalPredicate;
 use crate::schema::{DataType, StructField, StructType};
 use crate::table_changes::log_replay::LogReplayScanner;
-use crate::table_features::ReaderFeatures;
+use crate::table_features::ReaderFeature;
 use crate::utils::test_utils::{Action, LocalMockTable};
 use crate::Expression;
 use crate::{DeltaResult, Engine, Error, Version};
@@ -37,7 +37,7 @@ fn get_segment(
     let table_root = url::Url::from_directory_path(path).unwrap();
     let log_root = table_root.join("_delta_log/")?;
     let log_segment = LogSegment::for_table_changes(
-        engine.get_file_system_client().as_ref(),
+        engine.storage_handler().as_ref(),
         log_root,
         start_version,
         end_version,
@@ -45,8 +45,8 @@ fn get_segment(
     Ok(log_segment.ascending_commit_files)
 }
 
-fn result_to_sv(iter: impl Iterator<Item = DeltaResult<TableChangesScanData>>) -> Vec<bool> {
-    iter.map_ok(|scan_data| scan_data.selection_vector.into_iter())
+fn result_to_sv(iter: impl Iterator<Item = DeltaResult<TableChangesScanMetadata>>) -> Vec<bool> {
+    iter.map_ok(|scan_metadata| scan_metadata.selection_vector.into_iter())
         .flatten_ok()
         .try_collect()
         .unwrap()
@@ -75,8 +75,8 @@ async fn metadata_protocol() {
                 Protocol::try_new(
                     3,
                     7,
-                    Some([ReaderFeatures::DeletionVectors]),
-                    Some([ReaderFeatures::ColumnMapping]),
+                    Some([ReaderFeature::DeletionVectors]),
+                    Some([ReaderFeature::ColumnMapping]),
                 )
                 .unwrap(),
             ),
@@ -129,10 +129,7 @@ async fn unsupported_reader_feature() {
             Protocol::try_new(
                 3,
                 7,
-                Some([
-                    ReaderFeatures::DeletionVectors,
-                    ReaderFeatures::ColumnMapping,
-                ]),
+                Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]),
                 Some([""; 0]),
             )
             .unwrap(),
@@ -297,10 +294,10 @@ async fn add_remove() {
 
     let sv = table_changes_action_iter(engine, commits, get_schema().into(), None)
         .unwrap()
-        .flat_map(|scan_data| {
-            let scan_data = scan_data.unwrap();
-            assert_eq!(scan_data.remove_dvs, HashMap::new().into());
-            scan_data.selection_vector
+        .flat_map(|scan_metadata| {
+            let scan_metadata = scan_metadata.unwrap();
+            assert_eq!(scan_metadata.remove_dvs, HashMap::new().into());
+            scan_metadata.selection_vector
         })
         .collect_vec();
 
@@ -347,10 +344,10 @@ async fn filter_data_change() {
 
     let sv = table_changes_action_iter(engine, commits, get_schema().into(), None)
         .unwrap()
-        .flat_map(|scan_data| {
-            let scan_data = scan_data.unwrap();
-            assert_eq!(scan_data.remove_dvs, HashMap::new().into());
-            scan_data.selection_vector
+        .flat_map(|scan_metadata| {
+            let scan_metadata = scan_metadata.unwrap();
+            assert_eq!(scan_metadata.remove_dvs, HashMap::new().into());
+            scan_metadata.selection_vector
         })
         .collect_vec();
 
@@ -393,10 +390,10 @@ async fn cdc_selection() {
 
     let sv = table_changes_action_iter(engine, commits, get_schema().into(), None)
         .unwrap()
-        .flat_map(|scan_data| {
-            let scan_data = scan_data.unwrap();
-            assert_eq!(scan_data.remove_dvs, HashMap::new().into());
-            scan_data.selection_vector
+        .flat_map(|scan_metadata| {
+            let scan_metadata = scan_metadata.unwrap();
+            assert_eq!(scan_metadata.remove_dvs, HashMap::new().into());
+            scan_metadata.selection_vector
         })
         .collect_vec();
 
@@ -459,10 +456,10 @@ async fn dv() {
     .into();
     let sv = table_changes_action_iter(engine, commits, get_schema().into(), None)
         .unwrap()
-        .flat_map(|scan_data| {
-            let scan_data = scan_data.unwrap();
-            assert_eq!(scan_data.remove_dvs, expected_remove_dvs);
-            scan_data.selection_vector
+        .flat_map(|scan_metadata| {
+            let scan_metadata = scan_metadata.unwrap();
+            assert_eq!(scan_metadata.remove_dvs, expected_remove_dvs);
+            scan_metadata.selection_vector
         })
         .collect_vec();
 
@@ -536,9 +533,9 @@ async fn data_skipping_filter() {
 
     let sv = table_changes_action_iter(engine, commits, logical_schema.into(), predicate)
         .unwrap()
-        .flat_map(|scan_data| {
-            let scan_data = scan_data.unwrap();
-            scan_data.selection_vector
+        .flat_map(|scan_metadata| {
+            let scan_metadata = scan_metadata.unwrap();
+            scan_metadata.selection_vector
         })
         .collect_vec();
 
diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs
index e65b0ae53..86d0f99af 100644
--- a/kernel/src/table_changes/mod.rs
+++ b/kernel/src/table_changes/mod.rs
@@ -31,7 +31,6 @@
 //! let table_change_batches = table_changes_scan.execute(engine.clone())?;
 //! # Ok::<(), Error>(())
 //! ```
-use std::collections::HashSet;
 use std::sync::{Arc, LazyLock};
 
 use scan::TableChangesScanBuilder;
@@ -42,7 +41,7 @@ use crate::log_segment::LogSegment;
 use crate::path::AsUrl;
 use crate::schema::{DataType, Schema, StructField, StructType};
 use crate::snapshot::Snapshot;
-use crate::table_features::{ColumnMappingMode, ReaderFeatures};
+use crate::table_features::{ColumnMappingMode, ReaderFeature};
 use crate::table_properties::TableProperties;
 use crate::utils::require;
 use crate::{DeltaResult, Engine, Error, Version};
@@ -111,7 +110,7 @@ static CDF_FIELDS: LazyLock<[StructField; 3]> = LazyLock::new(|| {
 pub struct TableChanges {
     pub(crate) log_segment: LogSegment,
     table_root: Url,
-    end_snapshot: Snapshot,
+    end_snapshot: Arc<Snapshot>,
     start_version: Version,
     schema: Schema,
 }
@@ -140,7 +139,7 @@ impl TableChanges {
     ) -> DeltaResult<Self> {
         let log_root = table_root.join("_delta_log/")?;
         let log_segment = LogSegment::for_table_changes(
-            engine.get_file_system_client().as_ref(),
+            engine.storage_handler().as_ref(),
             log_root,
             start_version,
             end_version,
@@ -149,9 +148,12 @@ impl TableChanges {
         // Both snapshots ensure that reading is supported at the start and end version using
         // `ensure_read_supported`. Note that we must still verify that reading is
         // supported for every protocol action in the CDF range.
-        let start_snapshot =
-            Snapshot::try_new(table_root.as_url().clone(), engine, Some(start_version))?;
-        let end_snapshot = Snapshot::try_new(table_root.as_url().clone(), engine, end_version)?;
+        let start_snapshot = Arc::new(Snapshot::try_new(
+            table_root.as_url().clone(),
+            engine,
+            Some(start_version),
+        )?);
+        let end_snapshot = Snapshot::try_new_from(start_snapshot.clone(), engine, end_version)?;
 
         // Verify CDF is enabled at the beginning and end of the interval using
         // [`check_cdf_table_properties`] to fail early. This also ensures that column mapping is
@@ -252,8 +254,8 @@ fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult
 /// Ensures that Change Data Feed is supported for a table with this [`Protocol`] .
 /// See the documentation of [`TableChanges`] for more details.
 fn ensure_cdf_read_supported(protocol: &Protocol) -> DeltaResult<()> {
-    static CDF_SUPPORTED_READER_FEATURES: LazyLock<HashSet<ReaderFeatures>> =
-        LazyLock::new(|| HashSet::from([ReaderFeatures::DeletionVectors]));
+    static CDF_SUPPORTED_READER_FEATURES: LazyLock<Vec<ReaderFeature>> =
+        LazyLock::new(|| vec![ReaderFeature::DeletionVectors]);
     match &protocol.reader_features() {
         // if min_reader_version = 3 and all reader features are subset of supported => OK
         Some(reader_features) if protocol.min_reader_version() == 3 => {
diff --git a/kernel/src/table_changes/scan.rs b/kernel/src/table_changes/scan.rs
index 4265e4805..b9bed794d 100644
--- a/kernel/src/table_changes/scan.rs
+++ b/kernel/src/table_changes/scan.rs
@@ -12,10 +12,10 @@ use crate::scan::{ColumnType, PhysicalPredicate, ScanResult};
 use crate::schema::{SchemaRef, StructType};
 use crate::{DeltaResult, Engine, ExpressionRef, FileMeta};
 
-use super::log_replay::{table_changes_action_iter, TableChangesScanData};
+use super::log_replay::{table_changes_action_iter, TableChangesScanMetadata};
 use super::physical_to_logical::{physical_to_logical_expr, scan_file_physical_schema};
 use super::resolve_dvs::{resolve_scan_file_dv, ResolvedCdfScanFile};
-use super::scan_file::scan_data_to_scan_file;
+use super::scan_file::scan_metadata_to_scan_file;
 use super::{TableChanges, CDF_FIELDS};
 
 /// The result of building a [`TableChanges`] scan over a table. This can be used to get the change
@@ -177,15 +177,16 @@ impl TableChangesScanBuilder {
 }
 
 impl TableChangesScan {
-    /// Returns an iterator of [`TableChangesScanData`] necessary to read CDF. Each row
+    /// Returns an iterator of [`TableChangesScanMetadata`] necessary to read CDF. Each row
     /// represents an action in the delta log. These rows are filtered to yield only the actions
-    /// necessary to read CDF. Additionally, [`TableChangesScanData`] holds metadata on the
-    /// deletion vectors present in the commit. The engine data in each scan data is guaranteed
-    /// to belong to the same commit. Several [`TableChangesScanData`] may belong to the same commit.
-    fn scan_data(
+    /// necessary to read CDF. Additionally, [`TableChangesScanMetadata`] holds metadata on the
+    /// deletion vectors present in the commit. The engine data in each scan metadata is guaranteed
+    /// to belong to the same commit. Several [`TableChangesScanMetadata`] may belong to the same
+    /// commit.
+    fn scan_metadata(
         &self,
         engine: Arc<dyn Engine>,
-    ) -> DeltaResult<impl Iterator<Item = DeltaResult<TableChangesScanData>>> {
+    ) -> DeltaResult<impl Iterator<Item = DeltaResult<TableChangesScanMetadata>>> {
         let commits = self
             .table_changes
             .log_segment
@@ -197,7 +198,7 @@ impl TableChangesScan {
             PhysicalPredicate::Some(predicate, schema) => Some((predicate, schema)),
             PhysicalPredicate::None => None,
         };
-        let schema = self.table_changes.end_snapshot.schema().clone().into();
+        let schema = self.table_changes.end_snapshot.schema();
         let it = table_changes_action_iter(engine, commits, schema, physical_predicate)?;
         Ok(Some(it).into_iter().flatten())
     }
@@ -238,8 +239,8 @@ impl TableChangesScan {
         &self,
         engine: Arc<dyn Engine>,
     ) -> DeltaResult<impl Iterator<Item = DeltaResult<ScanResult>>> {
-        let scan_data = self.scan_data(engine.clone())?;
-        let scan_files = scan_data_to_scan_file(scan_data);
+        let scan_metadata = self.scan_metadata(engine.clone())?;
+        let scan_files = scan_metadata_to_scan_file(scan_metadata);
 
         let global_scan_state = self.global_scan_state();
         let table_root = self.table_changes.table_root().clone();
@@ -286,7 +287,7 @@ fn read_scan_file(
         physical_to_logical_expr(&scan_file, global_state.logical_schema.as_ref(), all_fields)?;
     let physical_schema =
         scan_file_physical_schema(&scan_file, global_state.physical_schema.as_ref());
-    let phys_to_logical_eval = engine.get_expression_handler().get_evaluator(
+    let phys_to_logical_eval = engine.evaluation_handler().new_expression_evaluator(
         physical_schema.clone(),
         physical_to_logical_expr,
         global_state.logical_schema.clone().into(),
@@ -301,7 +302,7 @@ fn read_scan_file(
         size: 0,
         location,
     };
-    let read_result_iter = engine.get_parquet_handler().read_parquet_files(
+    let read_result_iter = engine.parquet_handler().read_parquet_files(
         &[file],
         physical_schema,
         physical_predicate,
diff --git a/kernel/src/table_changes/scan_file.rs b/kernel/src/table_changes/scan_file.rs
index f428e09df..0b7406856 100644
--- a/kernel/src/table_changes/scan_file.rs
+++ b/kernel/src/table_changes/scan_file.rs
@@ -6,7 +6,7 @@ use itertools::Itertools;
 use std::collections::HashMap;
 use std::sync::{Arc, LazyLock};
 
-use super::log_replay::TableChangesScanData;
+use super::log_replay::TableChangesScanMetadata;
 use crate::actions::visitors::visit_deletion_vector_at;
 use crate::engine_data::{GetData, TypedGetData};
 use crate::expressions::{column_expr, Expression};
@@ -47,17 +47,17 @@ pub(crate) struct CdfScanFile {
 
 pub(crate) type CdfScanCallback<T> = fn(context: &mut T, scan_file: CdfScanFile);
 
-/// Transforms an iterator of [`TableChangesScanData`] into an iterator of
+/// Transforms an iterator of [`TableChangesScanMetadata`] into an iterator of
 /// [`CdfScanFile`] by visiting the engine data.
-pub(crate) fn scan_data_to_scan_file(
-    scan_data: impl Iterator<Item = DeltaResult<TableChangesScanData>>,
+pub(crate) fn scan_metadata_to_scan_file(
+    scan_metadata: impl Iterator<Item = DeltaResult<TableChangesScanMetadata>>,
 ) -> impl Iterator<Item = DeltaResult<CdfScanFile>> {
-    scan_data
-        .map(|scan_data| -> DeltaResult<_> {
-            let scan_data = scan_data?;
+    scan_metadata
+        .map(|scan_metadata| -> DeltaResult<_> {
+            let scan_metadata = scan_metadata?;
             let callback: CdfScanCallback<Vec<CdfScanFile>> =
                 |context, scan_file| context.push(scan_file);
-            Ok(visit_cdf_scan_files(&scan_data, vec![], callback)?.into_iter())
+            Ok(visit_cdf_scan_files(&scan_metadata, vec![], callback)?.into_iter())
         }) // Iterator-Result-Iterator
         .flatten_ok() // Iterator-Result
 }
@@ -78,7 +78,7 @@ pub(crate) fn scan_data_to_scan_file(
 /// ## Example
 /// ```ignore
 /// let mut context = [my context];
-/// for res in scan_data { // scan data table_changes_scan.scan_data()
+/// for res in scan_metadata { // scan metadata table_changes_scan.scan_metadata()
 ///     let (data, vector, remove_dv) = res?;
 ///     context = delta_kernel::table_changes::scan_file::visit_cdf_scan_files(
 ///        data.as_ref(),
@@ -89,18 +89,18 @@ pub(crate) fn scan_data_to_scan_file(
 /// }
 /// ```
 pub(crate) fn visit_cdf_scan_files<T>(
-    scan_data: &TableChangesScanData,
+    scan_metadata: &TableChangesScanMetadata,
     context: T,
     callback: CdfScanCallback<T>,
 ) -> DeltaResult<T> {
     let mut visitor = CdfScanFileVisitor {
         callback,
         context,
-        selection_vector: &scan_data.selection_vector,
-        remove_dvs: scan_data.remove_dvs.as_ref(),
+        selection_vector: &scan_metadata.selection_vector,
+        remove_dvs: scan_metadata.remove_dvs.as_ref(),
     };
 
-    visitor.visit_rows_of(scan_data.scan_data.as_ref())?;
+    visitor.visit_rows_of(scan_metadata.scan_metadata.as_ref())?;
     Ok(visitor.context)
 }
 
@@ -172,7 +172,7 @@ impl<T> RowVisitor for CdfScanFileVisitor<'_, T> {
     }
 }
 
-/// Get the schema that scan rows (from [`TableChanges::scan_data`]) will be returned with.
+/// Get the schema that scan rows (from [`TableChanges::scan_metadata`]) will be returned with.
 pub(crate) fn cdf_scan_row_schema() -> SchemaRef {
     static CDF_SCAN_ROW_SCHEMA: LazyLock<Arc<StructType>> = LazyLock::new(|| {
         let deletion_vector = StructType::new([
@@ -213,7 +213,7 @@ pub(crate) fn cdf_scan_row_schema() -> SchemaRef {
 }
 
 /// Expression to convert an action with `log_schema` into one with
-/// [`cdf_scan_row_schema`]. This is the expression used to create [`TableChangesScanData`].
+/// [`cdf_scan_row_schema`]. This is the expression used to create [`TableChangesScanMetadata`].
 pub(crate) fn cdf_scan_row_expression(commit_timestamp: i64, commit_number: i64) -> Expression {
     Expression::struct_from([
         Expression::struct_from([
@@ -242,7 +242,7 @@ mod tests {
 
     use itertools::Itertools;
 
-    use super::{scan_data_to_scan_file, CdfScanFile, CdfScanFileType};
+    use super::{scan_metadata_to_scan_file, CdfScanFile, CdfScanFileType};
     use crate::actions::deletion_vector::DeletionVectorDescriptor;
     use crate::actions::{Add, Cdc, Remove};
     use crate::engine::sync::SyncEngine;
@@ -326,25 +326,23 @@ mod tests {
 
         let table_root = url::Url::from_directory_path(mock_table.table_root()).unwrap();
         let log_root = table_root.join("_delta_log/").unwrap();
-        let log_segment = LogSegment::for_table_changes(
-            engine.get_file_system_client().as_ref(),
-            log_root,
-            0,
-            None,
-        )
-        .unwrap();
+        let log_segment =
+            LogSegment::for_table_changes(engine.storage_handler().as_ref(), log_root, 0, None)
+                .unwrap();
         let table_schema = StructType::new([
             StructField::nullable("id", DataType::INTEGER),
             StructField::nullable("value", DataType::STRING),
         ]);
-        let scan_data = table_changes_action_iter(
+        let scan_metadata = table_changes_action_iter(
             Arc::new(engine),
             log_segment.ascending_commit_files.clone(),
             table_schema.into(),
             None,
         )
         .unwrap();
-        let scan_files: Vec<_> = scan_data_to_scan_file(scan_data).try_collect().unwrap();
+        let scan_files: Vec<_> = scan_metadata_to_scan_file(scan_metadata)
+            .try_collect()
+            .unwrap();
 
         // Generate the expected [`CdfScanFile`]
         let timestamps = log_segment
diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs
index 565546d52..a51a38ce7 100644
--- a/kernel/src/table_configuration.rs
+++ b/kernel/src/table_configuration.rs
@@ -8,19 +8,18 @@
 //! [`TableProperties`].
 //!
 //! [`Schema`]: crate::schema::Schema
-use std::collections::HashSet;
 use std::sync::{Arc, LazyLock};
 
 use url::Url;
 
 use crate::actions::{ensure_supported_features, Metadata, Protocol};
-use crate::schema::{Schema, SchemaRef};
+use crate::schema::{InvariantChecker, SchemaRef};
 use crate::table_features::{
-    column_mapping_mode, validate_schema_column_mapping, ColumnMappingMode, ReaderFeatures,
-    WriterFeatures,
+    column_mapping_mode, validate_schema_column_mapping, ColumnMappingMode, ReaderFeature,
+    WriterFeature,
 };
 use crate::table_properties::TableProperties;
-use crate::{DeltaResult, Version};
+use crate::{DeltaResult, Error, Version};
 
 /// Holds all the configuration for a table at a specific version. This includes the supported
 /// reader and writer features, table properties, schema, version, and table root. This can be used
@@ -33,7 +32,7 @@ use crate::{DeltaResult, Version};
 /// `try_new` successfully returns `TableConfiguration`, it is also guaranteed that reading the
 /// table is supported.
 #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
-#[derive(Debug)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub(crate) struct TableConfiguration {
     metadata: Metadata,
     protocol: Protocol,
@@ -88,57 +87,101 @@ impl TableConfiguration {
             version,
         })
     }
+
+    pub(crate) fn try_new_from(
+        table_configuration: &Self,
+        new_metadata: Option<Metadata>,
+        new_protocol: Option<Protocol>,
+        new_version: Version,
+    ) -> DeltaResult<Self> {
+        // simplest case: no new P/M, just return the existing table configuration with new version
+        if new_metadata.is_none() && new_protocol.is_none() {
+            return Ok(Self {
+                version: new_version,
+                ..table_configuration.clone()
+            });
+        }
+
+        // note that while we could pick apart the protocol/metadata updates and validate them
+        // individually, instead we just re-parse so that we can recycle the try_new validation
+        // (instead of duplicating it here).
+        Self::try_new(
+            new_metadata.unwrap_or_else(|| table_configuration.metadata.clone()),
+            new_protocol.unwrap_or_else(|| table_configuration.protocol.clone()),
+            table_configuration.table_root.clone(),
+            new_version,
+        )
+    }
+
     /// The [`Metadata`] for this table at this version.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn metadata(&self) -> &Metadata {
         &self.metadata
     }
+
     /// The [`Protocol`] of this table at this version.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn protocol(&self) -> &Protocol {
         &self.protocol
     }
-    /// The [`Schema`] of for this table at this version.
+
+    /// The logical schema ([`SchemaRef`]) of this table at this version.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
-    pub(crate) fn schema(&self) -> &Schema {
-        self.schema.as_ref()
+    pub(crate) fn schema(&self) -> SchemaRef {
+        self.schema.clone()
     }
+
     /// The [`TableProperties`] of this table at this version.
-    #[allow(unused)]
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn table_properties(&self) -> &TableProperties {
         &self.table_properties
     }
+
     /// The [`ColumnMappingMode`] for this table at this version.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn column_mapping_mode(&self) -> ColumnMappingMode {
         self.column_mapping_mode
     }
+
     /// The [`Url`] of the table this [`TableConfiguration`] belongs to
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn table_root(&self) -> &Url {
         &self.table_root
     }
+
     /// The [`Version`] which this [`TableConfiguration`] belongs to.
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn version(&self) -> Version {
         self.version
     }
+
     /// Returns `true` if the kernel supports writing to this table. This checks that the
     /// protocol's writer features are all supported.
-    #[allow(unused)]
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
-    pub(crate) fn is_write_supported(&self) -> bool {
-        self.protocol.ensure_write_supported().is_ok()
+    pub(crate) fn ensure_write_supported(&self) -> DeltaResult<()> {
+        self.protocol.ensure_write_supported()?;
+
+        // for now we don't allow invariants so although we support writer version 2 and the
+        // ColumnInvariant TableFeature we _must_ check here that they are not actually in use
+        if self.is_invariants_supported()
+            && InvariantChecker::has_invariants(self.schema().as_ref())
+        {
+            return Err(Error::unsupported(
+                "Column invariants are not yet supported",
+            ));
+        }
+
+        Ok(())
     }
+
     /// Returns `true` if kernel supports reading Change Data Feed on this table.
     /// See the documentation of [`TableChanges`] for more details.
     ///
     /// [`TableChanges`]: crate::table_changes::TableChanges
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
     pub(crate) fn is_cdf_read_supported(&self) -> bool {
-        static CDF_SUPPORTED_READER_FEATURES: LazyLock<HashSet<ReaderFeatures>> =
-            LazyLock::new(|| HashSet::from([ReaderFeatures::DeletionVectors]));
+        static CDF_SUPPORTED_READER_FEATURES: LazyLock<Vec<ReaderFeature>> =
+            LazyLock::new(|| vec![ReaderFeature::DeletionVectors]);
         let protocol_supported = match self.protocol.reader_features() {
             // if min_reader_version = 3 and all reader features are subset of supported => OK
             Some(reader_features) if self.protocol.min_reader_version() == 3 => {
@@ -159,21 +202,22 @@ impl TableConfiguration {
         );
         protocol_supported && cdf_enabled && column_mapping_disabled
     }
+
     /// Returns `true` if deletion vectors is supported on this table. To support deletion vectors,
     /// a table must support reader version 3, writer version 7, and the deletionVectors feature in
     /// both the protocol's readerFeatures and writerFeatures.
     ///
     /// See: <https://github.com/delta-io/delta/blob/master/PROTOCOL.md#deletion-vectors>
-    #[allow(unused)]
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    #[allow(unused)] // needed to compile w/o default features
     pub(crate) fn is_deletion_vector_supported(&self) -> bool {
         let read_supported = self
             .protocol()
-            .has_reader_feature(&ReaderFeatures::DeletionVectors)
+            .has_reader_feature(&ReaderFeature::DeletionVectors)
             && self.protocol.min_reader_version() == 3;
         let write_supported = self
             .protocol()
-            .has_writer_feature(&WriterFeatures::DeletionVectors)
+            .has_writer_feature(&WriterFeature::DeletionVectors)
             && self.protocol.min_writer_version() == 7;
         read_supported && write_supported
     }
@@ -183,8 +227,8 @@ impl TableConfiguration {
     /// table property is set to `true`.
     ///
     /// See: <https://github.com/delta-io/delta/blob/master/PROTOCOL.md#deletion-vectors>
-    #[allow(unused)]
     #[cfg_attr(feature = "developer-visibility", visibility::make(pub))]
+    #[allow(unused)] // needed to compile w/o default features
     pub(crate) fn is_deletion_vector_enabled(&self) -> bool {
         self.is_deletion_vector_supported()
             && self
@@ -192,6 +236,32 @@ impl TableConfiguration {
                 .enable_deletion_vectors
                 .unwrap_or(false)
     }
+
+    /// Returns `true` if the table supports the appendOnly table feature. To support this feature:
+    /// - The table must have a writer version between 2 and 7 (inclusive)
+    /// - If the table is on writer version 7, it must have the [`WriterFeature::AppendOnly`]
+    ///   writer feature.
+    pub(crate) fn is_append_only_supported(&self) -> bool {
+        let protocol = &self.protocol;
+        match protocol.min_writer_version() {
+            7 if protocol.has_writer_feature(&WriterFeature::AppendOnly) => true,
+            version => (2..=6).contains(&version),
+        }
+    }
+
+    #[allow(unused)]
+    pub(crate) fn is_append_only_enabled(&self) -> bool {
+        self.is_append_only_supported() && self.table_properties.append_only.unwrap_or(false)
+    }
+
+    /// Returns `true` if the table supports the column invariant table feature.
+    pub(crate) fn is_invariants_supported(&self) -> bool {
+        let protocol = &self.protocol;
+        match protocol.min_writer_version() {
+            7 if protocol.has_writer_feature(&WriterFeature::Invariants) => true,
+            version => (2..=6).contains(&version),
+        }
+    }
 }
 
 #[cfg(test)]
@@ -201,7 +271,8 @@ mod test {
     use url::Url;
 
     use crate::actions::{Metadata, Protocol};
-    use crate::table_features::{ReaderFeatures, WriterFeatures};
+    use crate::table_features::{ReaderFeature, WriterFeature};
+    use crate::table_properties::TableProperties;
 
     use super::TableConfiguration;
 
@@ -218,8 +289,8 @@ mod test {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::DeletionVectors]),
-            Some([WriterFeatures::DeletionVectors]),
+            Some([ReaderFeature::DeletionVectors]),
+            Some([WriterFeature::DeletionVectors]),
         )
         .unwrap();
         let table_root = Url::try_from("file:///").unwrap();
@@ -244,8 +315,8 @@ mod test {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::DeletionVectors]),
-            Some([WriterFeatures::DeletionVectors]),
+            Some([ReaderFeature::DeletionVectors]),
+            Some([WriterFeature::DeletionVectors]),
         )
         .unwrap();
         let table_root = Url::try_from("file:///").unwrap();
@@ -259,16 +330,10 @@ mod test {
             schema_string: r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#.to_string(),
             ..Default::default()
         };
-        let protocol = Protocol::try_new(
-            3,
-            7,
-            Some([ReaderFeatures::V2Checkpoint]),
-            Some([WriterFeatures::V2Checkpoint]),
-        )
-        .unwrap();
+        let protocol = Protocol::try_new(3, 7, Some(["unknown"]), Some(["unknown"])).unwrap();
         let table_root = Url::try_from("file:///").unwrap();
         TableConfiguration::try_new(metadata, protocol, table_root, 0)
-            .expect_err("V2 checkpoint is not supported in kernel");
+            .expect_err("Unknown feature is not supported in kernel");
     }
     #[test]
     fn dv_not_supported() {
@@ -283,8 +348,8 @@ mod test {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::TimestampWithoutTimezone]),
-            Some([WriterFeatures::TimestampWithoutTimezone]),
+            Some([ReaderFeature::TimestampWithoutTimezone]),
+            Some([WriterFeature::TimestampWithoutTimezone]),
         )
         .unwrap();
         let table_root = Url::try_from("file:///").unwrap();
@@ -292,4 +357,78 @@ mod test {
         assert!(!table_config.is_deletion_vector_supported());
         assert!(!table_config.is_deletion_vector_enabled());
     }
+
+    #[test]
+    fn test_try_new_from() {
+        let schema_string =r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#.to_string();
+        let metadata = Metadata {
+            configuration: HashMap::from_iter([(
+                "delta.enableChangeDataFeed".to_string(),
+                "true".to_string(),
+            )]),
+            schema_string: schema_string.clone(),
+            ..Default::default()
+        };
+        let protocol = Protocol::try_new(
+            3,
+            7,
+            Some([ReaderFeature::DeletionVectors]),
+            Some([WriterFeature::DeletionVectors]),
+        )
+        .unwrap();
+        let table_root = Url::try_from("file:///").unwrap();
+        let table_config = TableConfiguration::try_new(metadata, protocol, table_root, 0).unwrap();
+
+        let new_metadata = Metadata {
+            configuration: HashMap::from_iter([
+                (
+                    "delta.enableChangeDataFeed".to_string(),
+                    "false".to_string(),
+                ),
+                (
+                    "delta.enableDeletionVectors".to_string(),
+                    "true".to_string(),
+                ),
+            ]),
+            schema_string,
+            ..Default::default()
+        };
+        let new_protocol = Protocol::try_new(
+            3,
+            7,
+            Some([ReaderFeature::DeletionVectors, ReaderFeature::V2Checkpoint]),
+            Some([
+                WriterFeature::DeletionVectors,
+                WriterFeature::V2Checkpoint,
+                WriterFeature::AppendOnly,
+            ]),
+        )
+        .unwrap();
+        let new_version = 1;
+        let new_table_config = TableConfiguration::try_new_from(
+            &table_config,
+            Some(new_metadata.clone()),
+            Some(new_protocol.clone()),
+            new_version,
+        )
+        .unwrap();
+
+        assert_eq!(new_table_config.version(), new_version);
+        assert_eq!(new_table_config.metadata(), &new_metadata);
+        assert_eq!(new_table_config.protocol(), &new_protocol);
+        assert_eq!(new_table_config.schema(), table_config.schema());
+        assert_eq!(
+            new_table_config.table_properties(),
+            &TableProperties {
+                enable_change_data_feed: Some(false),
+                enable_deletion_vectors: Some(true),
+                ..Default::default()
+            }
+        );
+        assert_eq!(
+            new_table_config.column_mapping_mode(),
+            table_config.column_mapping_mode()
+        );
+        assert_eq!(new_table_config.table_root(), table_config.table_root());
+    }
 }
diff --git a/kernel/src/table_features/column_mapping.rs b/kernel/src/table_features/column_mapping.rs
index 442f742e9..4bd48b8f9 100644
--- a/kernel/src/table_features/column_mapping.rs
+++ b/kernel/src/table_features/column_mapping.rs
@@ -1,5 +1,5 @@
 //! Code to handle column mapping, including modes and schema transforms
-use super::ReaderFeatures;
+use super::ReaderFeature;
 use crate::actions::Protocol;
 use crate::schema::{ColumnName, DataType, MetadataValue, Schema, SchemaTransform, StructField};
 use crate::table_properties::TableProperties;
@@ -36,7 +36,7 @@ pub(crate) fn column_mapping_mode(
         // (but should be ignored) even when the feature is not supported. For details see
         // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#column-mapping
         (Some(mode), 2) => mode,
-        (Some(mode), 3) if protocol.has_reader_feature(&ReaderFeatures::ColumnMapping) => mode,
+        (Some(mode), 3) if protocol.has_reader_feature(&ReaderFeature::ColumnMapping) => mode,
         _ => ColumnMappingMode::None,
     }
 }
@@ -201,7 +201,7 @@ mod tests {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::ColumnMapping]),
+            Some([ReaderFeature::ColumnMapping]),
             empty_features.clone(),
         )
         .unwrap();
@@ -219,7 +219,7 @@ mod tests {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([ReaderFeatures::DeletionVectors]),
+            Some([ReaderFeature::DeletionVectors]),
             empty_features.clone(),
         )
         .unwrap();
@@ -237,10 +237,7 @@ mod tests {
         let protocol = Protocol::try_new(
             3,
             7,
-            Some([
-                ReaderFeatures::DeletionVectors,
-                ReaderFeatures::ColumnMapping,
-            ]),
+            Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]),
             empty_features,
         )
         .unwrap();
diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs
index ee27fc17e..761dc8402 100644
--- a/kernel/src/table_features/mod.rs
+++ b/kernel/src/table_features/mod.rs
@@ -1,8 +1,10 @@
-use std::collections::HashSet;
 use std::sync::LazyLock;
 
 use serde::{Deserialize, Serialize};
-use strum::{AsRefStr, Display as StrumDisplay, EnumString, VariantNames};
+use strum::{AsRefStr, Display as StrumDisplay, EnumCount, EnumString};
+
+use crate::actions::schemas::ToDataType;
+use crate::schema::DataType;
 
 pub(crate) use column_mapping::column_mapping_mode;
 pub use column_mapping::{validate_schema_column_mapping, ColumnMappingMode};
@@ -24,12 +26,12 @@ mod column_mapping;
     EnumString,
     StrumDisplay,
     AsRefStr,
-    VariantNames,
+    EnumCount,
     Hash,
 )]
 #[strum(serialize_all = "camelCase")]
 #[serde(rename_all = "camelCase")]
-pub enum ReaderFeatures {
+pub enum ReaderFeature {
     /// Mapping of one column to another
     ColumnMapping,
     /// Deletion vectors for merge, update, delete
@@ -48,6 +50,9 @@ pub enum ReaderFeatures {
     /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer
     /// protocol checks during VACUUM operations
     VacuumProtocolCheck,
+    #[serde(untagged)]
+    #[strum(default)]
+    Unknown(String),
 }
 
 /// Similar to reader features, writer features communicate capabilities that must be implemented
@@ -65,12 +70,12 @@ pub enum ReaderFeatures {
     EnumString,
     StrumDisplay,
     AsRefStr,
-    VariantNames,
+    EnumCount,
     Hash,
 )]
 #[strum(serialize_all = "camelCase")]
 #[serde(rename_all = "camelCase")]
-pub enum WriterFeatures {
+pub enum WriterFeature {
     /// Append Only Tables
     AppendOnly,
     /// Table invariants
@@ -109,65 +114,122 @@ pub enum WriterFeatures {
     /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer
     /// protocol checks during VACUUM operations
     VacuumProtocolCheck,
+    #[serde(untagged)]
+    #[strum(default)]
+    Unknown(String),
+}
+
+impl ToDataType for ReaderFeature {
+    fn to_data_type() -> DataType {
+        DataType::STRING
+    }
+}
+
+impl ToDataType for WriterFeature {
+    fn to_data_type() -> DataType {
+        DataType::STRING
+    }
 }
 
-impl From<ReaderFeatures> for String {
-    fn from(feature: ReaderFeatures) -> Self {
-        feature.to_string()
+#[cfg(test)] // currently only used in tests
+impl ReaderFeature {
+    pub(crate) fn unknown(s: impl ToString) -> Self {
+        ReaderFeature::Unknown(s.to_string())
     }
 }
 
-impl From<WriterFeatures> for String {
-    fn from(feature: WriterFeatures) -> Self {
-        feature.to_string()
+#[cfg(test)] // currently only used in tests
+impl WriterFeature {
+    pub(crate) fn unknown(s: impl ToString) -> Self {
+        WriterFeature::Unknown(s.to_string())
     }
 }
 
-// we support everything except V2 checkpoints
-pub(crate) static SUPPORTED_READER_FEATURES: LazyLock<HashSet<ReaderFeatures>> =
-    LazyLock::new(|| {
-        HashSet::from([
-            ReaderFeatures::ColumnMapping,
-            ReaderFeatures::DeletionVectors,
-            ReaderFeatures::TimestampWithoutTimezone,
-            ReaderFeatures::TypeWidening,
-            ReaderFeatures::TypeWideningPreview,
-            ReaderFeatures::VacuumProtocolCheck,
-        ])
-    });
-
-// write support wip: no table features are supported yet
-pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock<HashSet<WriterFeatures>> =
-    LazyLock::new(|| HashSet::from([]));
+pub(crate) static SUPPORTED_READER_FEATURES: LazyLock<Vec<ReaderFeature>> = LazyLock::new(|| {
+    vec![
+        ReaderFeature::ColumnMapping,
+        ReaderFeature::DeletionVectors,
+        ReaderFeature::TimestampWithoutTimezone,
+        ReaderFeature::TypeWidening,
+        ReaderFeature::TypeWideningPreview,
+        ReaderFeature::VacuumProtocolCheck,
+        ReaderFeature::V2Checkpoint,
+    ]
+});
+
+// note: we 'support' Invariants, but only insofar as we check that they are not present.
+// we support writing to tables that have Invariants enabled but not used. similarly, we only
+// support DeletionVectors in that we never write them (no DML).
+pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock<Vec<WriterFeature>> = LazyLock::new(|| {
+    vec![
+        WriterFeature::AppendOnly,
+        WriterFeature::DeletionVectors,
+        WriterFeature::Invariants,
+    ]
+});
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    #[test]
+    fn test_unknown_features() {
+        let mixed_reader = &[
+            ReaderFeature::DeletionVectors,
+            ReaderFeature::unknown("cool_feature"),
+            ReaderFeature::ColumnMapping,
+        ];
+        let mixed_writer = &[
+            WriterFeature::DeletionVectors,
+            WriterFeature::unknown("cool_feature"),
+            WriterFeature::AppendOnly,
+        ];
+
+        let reader_string = serde_json::to_string(mixed_reader).unwrap();
+        let writer_string = serde_json::to_string(mixed_writer).unwrap();
+
+        assert_eq!(
+            &reader_string,
+            "[\"deletionVectors\",\"cool_feature\",\"columnMapping\"]"
+        );
+        assert_eq!(
+            &writer_string,
+            "[\"deletionVectors\",\"cool_feature\",\"appendOnly\"]"
+        );
+
+        let typed_reader: Vec<ReaderFeature> = serde_json::from_str(&reader_string).unwrap();
+        let typed_writer: Vec<WriterFeature> = serde_json::from_str(&writer_string).unwrap();
+
+        assert_eq!(typed_reader.len(), 3);
+        assert_eq!(&typed_reader, mixed_reader);
+        assert_eq!(typed_writer.len(), 3);
+        assert_eq!(&typed_writer, mixed_writer);
+    }
+
     #[test]
     fn test_roundtrip_reader_features() {
         let cases = [
-            (ReaderFeatures::ColumnMapping, "columnMapping"),
-            (ReaderFeatures::DeletionVectors, "deletionVectors"),
-            (ReaderFeatures::TimestampWithoutTimezone, "timestampNtz"),
-            (ReaderFeatures::TypeWidening, "typeWidening"),
-            (ReaderFeatures::TypeWideningPreview, "typeWidening-preview"),
-            (ReaderFeatures::V2Checkpoint, "v2Checkpoint"),
-            (ReaderFeatures::VacuumProtocolCheck, "vacuumProtocolCheck"),
+            (ReaderFeature::ColumnMapping, "columnMapping"),
+            (ReaderFeature::DeletionVectors, "deletionVectors"),
+            (ReaderFeature::TimestampWithoutTimezone, "timestampNtz"),
+            (ReaderFeature::TypeWidening, "typeWidening"),
+            (ReaderFeature::TypeWideningPreview, "typeWidening-preview"),
+            (ReaderFeature::V2Checkpoint, "v2Checkpoint"),
+            (ReaderFeature::VacuumProtocolCheck, "vacuumProtocolCheck"),
+            (ReaderFeature::unknown("something"), "something"),
         ];
 
-        assert_eq!(ReaderFeatures::VARIANTS.len(), cases.len());
-
-        for ((feature, expected), name) in cases.into_iter().zip(ReaderFeatures::VARIANTS) {
-            assert_eq!(*name, expected);
+        assert_eq!(ReaderFeature::COUNT, cases.len());
 
+        for (feature, expected) in cases {
+            assert_eq!(feature.to_string(), expected);
             let serialized = serde_json::to_string(&feature).unwrap();
             assert_eq!(serialized, format!("\"{}\"", expected));
 
-            let deserialized: ReaderFeatures = serde_json::from_str(&serialized).unwrap();
+            let deserialized: ReaderFeature = serde_json::from_str(&serialized).unwrap();
             assert_eq!(deserialized, feature);
 
-            let from_str: ReaderFeatures = expected.parse().unwrap();
+            let from_str: ReaderFeature = expected.parse().unwrap();
             assert_eq!(from_str, feature);
         }
     }
@@ -175,37 +237,37 @@ mod tests {
     #[test]
     fn test_roundtrip_writer_features() {
         let cases = [
-            (WriterFeatures::AppendOnly, "appendOnly"),
-            (WriterFeatures::Invariants, "invariants"),
-            (WriterFeatures::CheckConstraints, "checkConstraints"),
-            (WriterFeatures::ChangeDataFeed, "changeDataFeed"),
-            (WriterFeatures::GeneratedColumns, "generatedColumns"),
-            (WriterFeatures::ColumnMapping, "columnMapping"),
-            (WriterFeatures::IdentityColumns, "identityColumns"),
-            (WriterFeatures::DeletionVectors, "deletionVectors"),
-            (WriterFeatures::RowTracking, "rowTracking"),
-            (WriterFeatures::TimestampWithoutTimezone, "timestampNtz"),
-            (WriterFeatures::TypeWidening, "typeWidening"),
-            (WriterFeatures::TypeWideningPreview, "typeWidening-preview"),
-            (WriterFeatures::DomainMetadata, "domainMetadata"),
-            (WriterFeatures::V2Checkpoint, "v2Checkpoint"),
-            (WriterFeatures::IcebergCompatV1, "icebergCompatV1"),
-            (WriterFeatures::IcebergCompatV2, "icebergCompatV2"),
-            (WriterFeatures::VacuumProtocolCheck, "vacuumProtocolCheck"),
+            (WriterFeature::AppendOnly, "appendOnly"),
+            (WriterFeature::Invariants, "invariants"),
+            (WriterFeature::CheckConstraints, "checkConstraints"),
+            (WriterFeature::ChangeDataFeed, "changeDataFeed"),
+            (WriterFeature::GeneratedColumns, "generatedColumns"),
+            (WriterFeature::ColumnMapping, "columnMapping"),
+            (WriterFeature::IdentityColumns, "identityColumns"),
+            (WriterFeature::DeletionVectors, "deletionVectors"),
+            (WriterFeature::RowTracking, "rowTracking"),
+            (WriterFeature::TimestampWithoutTimezone, "timestampNtz"),
+            (WriterFeature::TypeWidening, "typeWidening"),
+            (WriterFeature::TypeWideningPreview, "typeWidening-preview"),
+            (WriterFeature::DomainMetadata, "domainMetadata"),
+            (WriterFeature::V2Checkpoint, "v2Checkpoint"),
+            (WriterFeature::IcebergCompatV1, "icebergCompatV1"),
+            (WriterFeature::IcebergCompatV2, "icebergCompatV2"),
+            (WriterFeature::VacuumProtocolCheck, "vacuumProtocolCheck"),
+            (WriterFeature::unknown("something"), "something"),
         ];
 
-        assert_eq!(WriterFeatures::VARIANTS.len(), cases.len());
-
-        for ((feature, expected), name) in cases.into_iter().zip(WriterFeatures::VARIANTS) {
-            assert_eq!(*name, expected);
+        assert_eq!(WriterFeature::COUNT, cases.len());
 
+        for (feature, expected) in cases {
+            assert_eq!(feature.to_string(), expected);
             let serialized = serde_json::to_string(&feature).unwrap();
             assert_eq!(serialized, format!("\"{}\"", expected));
 
-            let deserialized: WriterFeatures = serde_json::from_str(&serialized).unwrap();
+            let deserialized: WriterFeature = serde_json::from_str(&serialized).unwrap();
             assert_eq!(deserialized, feature);
 
-            let from_str: WriterFeatures = expected.parse().unwrap();
+            let from_str: WriterFeature = expected.parse().unwrap();
             assert_eq!(from_str, feature);
         }
     }
diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs
index d74c2456a..2ed73b142 100644
--- a/kernel/src/transaction.rs
+++ b/kernel/src/transaction.rs
@@ -78,7 +78,9 @@ impl Transaction {
         let read_snapshot = snapshot.into();
 
         // important! before a read/write to the table we must check it is supported
-        read_snapshot.protocol().ensure_write_supported()?;
+        read_snapshot
+            .table_configuration()
+            .ensure_write_supported()?;
 
         Ok(Transaction {
             read_snapshot,
@@ -110,7 +112,7 @@ impl Transaction {
             ParsedLogPath::new_commit(self.read_snapshot.table_root(), commit_version)?;
 
         // step three: commit the actions as a json file in the log
-        let json_handler = engine.get_json_handler();
+        let json_handler = engine.json_handler();
         match json_handler.write_json_file(&commit_path.location, Box::new(actions), false) {
             Ok(()) => Ok(CommitResult::Committed(commit_version)),
             Err(Error::FileAlreadyExists(_)) => Ok(CommitResult::Conflict(self, commit_version)),
@@ -149,8 +151,9 @@ impl Transaction {
         // for now, we just pass through all the columns except partition columns.
         // note this is _incorrect_ if table config deems we need partition columns.
         let partition_columns = &self.read_snapshot.metadata().partition_columns;
-        let fields = self.read_snapshot.schema().fields();
-        let fields = fields
+        let schema = self.read_snapshot.schema();
+        let fields = schema
+            .fields()
             .filter(|f| !partition_columns.contains(f.name()))
             .map(|f| Expression::column([f.name()]));
         Expression::struct_from(fields)
@@ -165,11 +168,7 @@ impl Transaction {
         let target_dir = self.read_snapshot.table_root();
         let snapshot_schema = self.read_snapshot.schema();
         let logical_to_physical = self.generate_logical_to_physical();
-        WriteContext::new(
-            target_dir.clone(),
-            Arc::new(snapshot_schema.clone()),
-            logical_to_physical,
-        )
+        WriteContext::new(target_dir.clone(), snapshot_schema, logical_to_physical)
     }
 
     /// Add write metadata about files to include in the transaction. This API can be called
@@ -187,7 +186,7 @@ fn generate_adds<'a>(
     engine: &dyn Engine,
     write_metadata: impl Iterator<Item = &'a dyn EngineData> + Send + 'a,
 ) -> impl Iterator<Item = DeltaResult<Box<dyn EngineData>>> + Send + 'a {
-    let expression_handler = engine.get_expression_handler();
+    let evaluation_handler = engine.evaluation_handler();
     let write_metadata_schema = get_write_metadata_schema();
     let log_schema = get_log_add_schema();
 
@@ -197,7 +196,7 @@ fn generate_adds<'a>(
                 .fields()
                 .map(|f| Expression::column([f.name()])),
         )]);
-        let adds_evaluator = expression_handler.get_evaluator(
+        let adds_evaluator = evaluation_handler.new_expression_evaluator(
             write_metadata_schema.clone(),
             adds_expr,
             log_schema.clone().into(),
@@ -321,7 +320,7 @@ fn generate_commit_info(
         .shift_remove("inCommitTimestamp");
     commit_info_field.data_type = DataType::Struct(commit_info_data_type);
 
-    let commit_info_evaluator = engine.get_expression_handler().get_evaluator(
+    let commit_info_evaluator = engine.evaluation_handler().new_expression_evaluator(
         engine_commit_info_schema.into(),
         commit_info_expr,
         commit_info_empty_struct_schema.into(),
@@ -335,52 +334,51 @@ mod tests {
     use super::*;
 
     use crate::engine::arrow_data::ArrowEngineData;
-    use crate::engine::arrow_expression::ArrowExpressionHandler;
+    use crate::engine::arrow_expression::ArrowEvaluationHandler;
     use crate::schema::MapType;
-    use crate::{ExpressionHandler, FileSystemClient, JsonHandler, ParquetHandler};
+    use crate::{EvaluationHandler, JsonHandler, ParquetHandler, StorageHandler};
 
-    use arrow::json::writer::LineDelimitedWriter;
-    use arrow::record_batch::RecordBatch;
-    use arrow_array::builder::StringBuilder;
-    use arrow_schema::Schema as ArrowSchema;
-    use arrow_schema::{DataType as ArrowDataType, Field};
+    use crate::arrow::array::{MapArray, MapBuilder, MapFieldNames, StringArray, StringBuilder};
+    use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema};
+    use crate::arrow::error::ArrowError;
+    use crate::arrow::json::writer::LineDelimitedWriter;
+    use crate::arrow::record_batch::RecordBatch;
 
-    struct ExprEngine(Arc<dyn ExpressionHandler>);
+    struct ExprEngine(Arc<dyn EvaluationHandler>);
 
     impl ExprEngine {
         fn new() -> Self {
-            ExprEngine(Arc::new(ArrowExpressionHandler))
+            ExprEngine(Arc::new(ArrowEvaluationHandler))
         }
     }
 
     impl Engine for ExprEngine {
-        fn get_expression_handler(&self) -> Arc<dyn ExpressionHandler> {
+        fn evaluation_handler(&self) -> Arc<dyn EvaluationHandler> {
             self.0.clone()
         }
 
-        fn get_json_handler(&self) -> Arc<dyn JsonHandler> {
+        fn json_handler(&self) -> Arc<dyn JsonHandler> {
             unimplemented!()
         }
 
-        fn get_parquet_handler(&self) -> Arc<dyn ParquetHandler> {
+        fn parquet_handler(&self) -> Arc<dyn ParquetHandler> {
             unimplemented!()
         }
 
-        fn get_file_system_client(&self) -> Arc<dyn FileSystemClient> {
+        fn storage_handler(&self) -> Arc<dyn StorageHandler> {
             unimplemented!()
         }
     }
 
-    fn build_map(entries: Vec<(&str, &str)>) -> arrow_array::MapArray {
+    fn build_map(entries: Vec<(&str, &str)>) -> MapArray {
         let key_builder = StringBuilder::new();
         let val_builder = StringBuilder::new();
-        let names = arrow_array::builder::MapFieldNames {
+        let names = MapFieldNames {
             entry: "entries".to_string(),
             key: "key".to_string(),
             value: "value".to_string(),
         };
-        let mut builder =
-            arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder);
+        let mut builder = MapBuilder::new(Some(names), key_builder, val_builder);
         for (key, val) in entries {
             builder.keys().append_value(key);
             builder.values().append_value(val);
@@ -494,7 +492,7 @@ mod tests {
             engine_commit_info_schema,
             vec![
                 Arc::new(map_array),
-                Arc::new(arrow_array::StringArray::from(vec!["some_string"])),
+                Arc::new(StringArray::from(vec!["some_string"])),
             ],
         )?;
 
@@ -533,7 +531,7 @@ mod tests {
         )]));
         let commit_info_batch = RecordBatch::try_new(
             engine_commit_info_schema,
-            vec![Arc::new(arrow_array::StringArray::new_null(1))],
+            vec![Arc::new(StringArray::new_null(1))],
         )?;
 
         let _ = generate_commit_info(
@@ -542,12 +540,9 @@ mod tests {
             &ArrowEngineData::new(commit_info_batch),
         )
         .map_err(|e| match e {
-            Error::Arrow(arrow_schema::ArrowError::SchemaError(_)) => (),
+            Error::Arrow(ArrowError::SchemaError(_)) => (),
             Error::Backtraced { source, .. }
-                if matches!(
-                    &*source,
-                    Error::Arrow(arrow_schema::ArrowError::SchemaError(_))
-                ) => {}
+                if matches!(&*source, Error::Arrow(ArrowError::SchemaError(_))) => {}
             _ => panic!("expected arrow schema error error, got {:?}", e),
         });
 
@@ -564,7 +559,7 @@ mod tests {
         )]));
         let commit_info_batch = RecordBatch::try_new(
             engine_commit_info_schema,
-            vec![Arc::new(arrow_array::StringArray::new_null(1))],
+            vec![Arc::new(StringArray::new_null(1))],
         )?;
 
         let _ = generate_commit_info(
@@ -573,12 +568,9 @@ mod tests {
             &ArrowEngineData::new(commit_info_batch),
         )
         .map_err(|e| match e {
-            Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(_)) => (),
+            Error::Arrow(ArrowError::InvalidArgumentError(_)) => (),
             Error::Backtraced { source, .. }
-                if matches!(
-                    &*source,
-                    Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(_))
-                ) => {}
+                if matches!(&*source, Error::Arrow(ArrowError::InvalidArgumentError(_))) => {}
             _ => panic!("expected arrow invalid arg error, got {:?}", e),
         });
 
@@ -644,16 +636,16 @@ mod tests {
                 ),
                 true,
             )]));
-            use arrow_array::builder::StringBuilder;
+
             let key_builder = StringBuilder::new();
             let val_builder = StringBuilder::new();
-            let names = arrow_array::builder::MapFieldNames {
+            let names = crate::arrow::array::MapFieldNames {
                 entry: "entries".to_string(),
                 key: "key".to_string(),
                 value: "value".to_string(),
             };
             let mut builder =
-                arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder);
+                crate::arrow::array::MapBuilder::new(Some(names), key_builder, val_builder);
             builder.append(is_null).unwrap();
             let array = builder.finish();
 
diff --git a/kernel/src/utils.rs b/kernel/src/utils.rs
index 8f4fcf818..27e5bc3cf 100644
--- a/kernel/src/utils.rs
+++ b/kernel/src/utils.rs
@@ -13,6 +13,12 @@ pub(crate) use require;
 
 #[cfg(test)]
 pub(crate) mod test_utils {
+    use crate::actions::get_log_schema;
+    use crate::arrow::array::{RecordBatch, StringArray};
+    use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
+    use crate::engine::sync::SyncEngine;
+    use crate::Engine;
+
     use itertools::Itertools;
     use object_store::local::LocalFileSystem;
     use object_store::ObjectStore;
@@ -21,7 +27,11 @@ pub(crate) mod test_utils {
     use tempfile::TempDir;
     use test_utils::delta_path_for_version;
 
-    use crate::actions::{Add, Cdc, CommitInfo, Metadata, Protocol, Remove};
+    use crate::{
+        actions::{Add, Cdc, CommitInfo, Metadata, Protocol, Remove},
+        engine::arrow_data::ArrowEngineData,
+        EngineData,
+    };
 
     #[derive(Serialize)]
     pub(crate) enum Action {
@@ -73,9 +83,55 @@ pub(crate) mod test_utils {
                 .await
                 .expect("put log file in store");
         }
+
         /// Get the path to the root of the table.
         pub(crate) fn table_root(&self) -> &Path {
             self.dir.path()
         }
     }
+
+    /// Try to convert an `EngineData` into a `RecordBatch`. Panics if not using `ArrowEngineData` from
+    /// the default module
+    fn into_record_batch(engine_data: Box<dyn EngineData>) -> RecordBatch {
+        ArrowEngineData::try_from_engine_data(engine_data)
+            .unwrap()
+            .into()
+    }
+
+    /// Checks that two `EngineData` objects are equal by converting them to `RecordBatch` and comparing
+    pub(crate) fn assert_batch_matches(actual: Box<dyn EngineData>, expected: Box<dyn EngineData>) {
+        assert_eq!(into_record_batch(actual), into_record_batch(expected));
+    }
+
+    pub(crate) fn string_array_to_engine_data(string_array: StringArray) -> Box<dyn EngineData> {
+        let string_field = Arc::new(Field::new("a", DataType::Utf8, true));
+        let schema = Arc::new(ArrowSchema::new(vec![string_field]));
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(string_array)])
+            .expect("Can't convert to record batch");
+        Box::new(ArrowEngineData::new(batch))
+    }
+
+    pub(crate) fn parse_json_batch(json_strings: StringArray) -> Box<dyn EngineData> {
+        let engine = SyncEngine::new();
+        let json_handler = engine.json_handler();
+        let output_schema = get_log_schema().clone();
+        json_handler
+            .parse_json(string_array_to_engine_data(json_strings), output_schema)
+            .unwrap()
+    }
+
+    pub(crate) fn action_batch() -> Box<dyn EngineData> {
+        let json_strings: StringArray = vec![
+            r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues":{},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#,
+            r#"{"remove":{"path":"part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452}}"#, 
+            r#"{"commitInfo":{"timestamp":1677811178585,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"635"},"engineInfo":"Databricks-Runtime/<unknown>","txnId":"a6a94671-55ef-450e-9546-b8465b9147de"}}"#,
+            r#"{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}}"#,
+            r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none", "delta.enableChangeDataFeed":"true"},"createdTime":1677811175819}}"#,
+            r#"{"cdc":{"path":"_change_data/age=21/cdc-00000-93f7fceb-281a-446a-b221-07b88132d203.c000.snappy.parquet","partitionValues":{"age":"21"},"size":1033,"dataChange":false}}"#,
+            r#"{"sidecar":{"path":"016ae953-37a9-438e-8683-9a9a4a79a395.parquet","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{"tag_foo":"tag_bar"}}}"#,
+            r#"{"txn":{"appId":"myApp","version": 3}}"#,
+        ]
+        .into();
+        parse_json_batch(json_strings)
+    }
 }
diff --git a/kernel/tests/cdf.rs b/kernel/tests/cdf.rs
index 2560dc71d..069018951 100644
--- a/kernel/tests/cdf.rs
+++ b/kernel/tests/cdf.rs
@@ -1,7 +1,7 @@
 use std::{error, sync::Arc};
 
-use arrow::compute::filter_record_batch;
-use arrow_array::RecordBatch;
+use delta_kernel::arrow::array::RecordBatch;
+use delta_kernel::arrow::compute::filter_record_batch;
 use delta_kernel::engine::sync::SyncEngine;
 use itertools::Itertools;
 
diff --git a/kernel/tests/common/mod.rs b/kernel/tests/common/mod.rs
index a918695b7..4268f0626 100644
--- a/kernel/tests/common/mod.rs
+++ b/kernel/tests/common/mod.rs
@@ -1,6 +1,6 @@
-use arrow::compute::filter_record_batch;
-use arrow::record_batch::RecordBatch;
-use arrow::util::pretty::pretty_format_batches;
+use delta_kernel::arrow::compute::filter_record_batch;
+use delta_kernel::arrow::record_batch::RecordBatch;
+use delta_kernel::arrow::util::pretty::pretty_format_batches;
 use itertools::Itertools;
 
 use crate::ArrowEngineData;
@@ -24,7 +24,7 @@ macro_rules! sort_lines {
 #[macro_export]
 macro_rules! assert_batches_sorted_eq {
     ($expected_lines_sorted: expr, $CHUNKS: expr) => {
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS)
+        let formatted = delta_kernel::arrow::util::pretty::pretty_format_batches($CHUNKS)
             .unwrap()
             .to_string();
         // fix for windows: \r\n -->
diff --git a/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst b/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst
new file mode 100644
index 000000000..dbb8aa627
Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst differ
diff --git a/kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst
new file mode 100644
index 000000000..a31194638
Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst differ
diff --git a/kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst
new file mode 100644
index 000000000..aaba3d3f8
Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst differ
diff --git a/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst
new file mode 100644
index 000000000..4f6833a5a
Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst differ
diff --git a/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst
new file mode 100644
index 000000000..0f2a289b1
Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst differ
diff --git a/kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst
new file mode 100644
index 000000000..b90fc04b4
Binary files /dev/null and b/kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst differ
diff --git a/kernel/tests/data/v2-classic-checkpoint-json.tar.zst b/kernel/tests/data/v2-classic-checkpoint-json.tar.zst
new file mode 100644
index 000000000..c695339cd
Binary files /dev/null and b/kernel/tests/data/v2-classic-checkpoint-json.tar.zst differ
diff --git a/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst b/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst
new file mode 100644
index 000000000..87bca6f59
Binary files /dev/null and b/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst differ
diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs
index 120271ef2..241279906 100644
--- a/kernel/tests/golden_tables.rs
+++ b/kernel/tests/golden_tables.rs
@@ -3,23 +3,23 @@
 //! Data (golden tables) are stored in tests/golden_data/<table_name>.tar.zst
 //! Each table directory has a table/ and expected/ subdirectory with the input/output respectively
 
-use arrow::array::AsArray;
-use arrow::{compute::filter_record_batch, record_batch::RecordBatch};
-use arrow_ord::sort::{lexsort_to_indices, SortColumn};
-use arrow_schema::{FieldRef, Schema};
-use arrow_select::{concat::concat_batches, take::take};
+use delta_kernel::arrow::array::{Array, AsArray, StructArray};
+use delta_kernel::arrow::compute::{concat_batches, take};
+use delta_kernel::arrow::compute::{lexsort_to_indices, SortColumn};
+use delta_kernel::arrow::datatypes::{DataType, FieldRef, Schema};
+use delta_kernel::arrow::{compute::filter_record_batch, record_batch::RecordBatch};
 use itertools::Itertools;
 use paste::paste;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
+use delta_kernel::parquet::arrow::async_reader::{
+    ParquetObjectReader, ParquetRecordBatchStreamBuilder,
+};
 use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Table};
 use futures::{stream::TryStreamExt, StreamExt};
 use object_store::{local::LocalFileSystem, ObjectStore};
-use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder};
 
-use arrow_array::{Array, StructArray};
-use arrow_schema::DataType;
 use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor;
 use delta_kernel::engine::default::DefaultEngine;
 
@@ -273,8 +273,8 @@ async fn canonicalized_paths_test(
         .into_scan_builder()
         .build()
         .expect("build the scan");
-    let mut scan_data = scan.scan_data(&engine).expect("scan data");
-    assert!(scan_data.next().is_none());
+    let mut scan_metadata = scan.scan_metadata(&engine).expect("scan metadata");
+    assert!(scan_metadata.next().is_none());
     Ok(())
 }
 
@@ -289,9 +289,12 @@ async fn checkpoint_test(
         .into_scan_builder()
         .build()
         .expect("build the scan");
-    let scan_data: Vec<_> = scan.scan_data(&engine).expect("scan data").collect();
+    let scan_metadata: Vec<_> = scan
+        .scan_metadata(&engine)
+        .expect("scan metadata")
+        .collect();
     assert_eq!(version, 14);
-    assert!(scan_data.len() == 1);
+    assert!(scan_metadata.len() == 1);
     Ok(())
 }
 
@@ -408,9 +411,8 @@ golden_test!("time-travel-schema-changes-b", latest_snapshot_test);
 golden_test!("time-travel-start", latest_snapshot_test);
 golden_test!("time-travel-start-start20", latest_snapshot_test);
 golden_test!("time-travel-start-start20-start40", latest_snapshot_test);
-
-skip_test!("v2-checkpoint-json": "v2 checkpoint not supported");
-skip_test!("v2-checkpoint-parquet": "v2 checkpoint not supported");
+golden_test!("v2-checkpoint-json", latest_snapshot_test);
+golden_test!("v2-checkpoint-parquet", latest_snapshot_test);
 
 // BUG:
 // - AddFile: 'file:/some/unqualified/absolute/path'
diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs
index 9d5d24314..2247240ff 100644
--- a/kernel/tests/read.rs
+++ b/kernel/tests/read.rs
@@ -3,22 +3,23 @@ use std::ops::Not;
 use std::path::PathBuf;
 use std::sync::Arc;
 
-use arrow::compute::filter_record_batch;
-use arrow_schema::SchemaRef as ArrowSchemaRef;
-use arrow_select::concat::concat_batches;
 use delta_kernel::actions::deletion_vector::split_vector;
+use delta_kernel::arrow::compute::{concat_batches, filter_record_batch};
+use delta_kernel::arrow::datatypes::SchemaRef as ArrowSchemaRef;
 use delta_kernel::engine::arrow_data::ArrowEngineData;
 use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor;
 use delta_kernel::engine::default::DefaultEngine;
 use delta_kernel::expressions::{column_expr, BinaryOperator, Expression, ExpressionRef};
-use delta_kernel::scan::state::{transform_to_logical, visit_scan_files, DvInfo, Stats};
+use delta_kernel::parquet::file::properties::{EnabledStatistics, WriterProperties};
+use delta_kernel::scan::state::{transform_to_logical, DvInfo, Stats};
 use delta_kernel::scan::Scan;
 use delta_kernel::schema::{DataType, Schema};
 use delta_kernel::{Engine, FileMeta, Table};
+use itertools::Itertools;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use test_utils::{
     actions_to_string, add_commit, generate_batch, generate_simple_batch, into_record_batch,
-    record_batch_to_bytes, IntoArray, TestAction, METADATA,
+    record_batch_to_bytes, record_batch_to_bytes_with_props, IntoArray, TestAction, METADATA,
 };
 use url::Url;
 
@@ -58,7 +59,6 @@ async fn single_commit_two_add_files() -> Result<(), Box<dyn std::error::Error>>
     let location = Url::parse("memory:///")?;
     let engine = Arc::new(DefaultEngine::new(
         storage.clone(),
-        Path::from("/"),
         Arc::new(TokioBackgroundExecutor::new()),
     ));
 
@@ -113,11 +113,7 @@ async fn two_commits() -> Result<(), Box<dyn std::error::Error>> {
         .await?;
 
     let location = Url::parse("memory:///").unwrap();
-    let engine = DefaultEngine::new(
-        storage.clone(),
-        Path::from("/"),
-        Arc::new(TokioBackgroundExecutor::new()),
-    );
+    let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new()));
 
     let table = Table::new(location);
     let expected_data = vec![batch.clone(), batch];
@@ -171,11 +167,7 @@ async fn remove_action() -> Result<(), Box<dyn std::error::Error>> {
         .await?;
 
     let location = Url::parse("memory:///").unwrap();
-    let engine = DefaultEngine::new(
-        storage.clone(),
-        Path::from("/"),
-        Arc::new(TokioBackgroundExecutor::new()),
-    );
+    let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new()));
 
     let table = Table::new(location);
     let expected_data = vec![batch];
@@ -249,7 +241,6 @@ async fn stats() -> Result<(), Box<dyn std::error::Error>> {
     let location = Url::parse("memory:///").unwrap();
     let engine = Arc::new(DefaultEngine::new(
         storage.clone(),
-        Path::from(""),
         Arc::new(TokioBackgroundExecutor::new()),
     ));
 
@@ -342,7 +333,7 @@ struct ScanFile {
     transform: Option<ExpressionRef>,
 }
 
-fn scan_data_callback(
+fn scan_metadata_callback(
     batches: &mut Vec<ScanFile>,
     path: &str,
     size: i64,
@@ -359,7 +350,7 @@ fn scan_data_callback(
     });
 }
 
-fn read_with_scan_data(
+fn read_with_scan_metadata(
     location: &Url,
     engine: &dyn Engine,
     scan: &Scan,
@@ -367,17 +358,11 @@ fn read_with_scan_data(
 ) -> Result<(), Box<dyn std::error::Error>> {
     let global_state = scan.global_scan_state();
     let result_schema: ArrowSchemaRef = Arc::new(scan.schema().as_ref().try_into()?);
-    let scan_data = scan.scan_data(engine)?;
+    let scan_metadata = scan.scan_metadata(engine)?;
     let mut scan_files = vec![];
-    for data in scan_data {
-        let (data, vec, transforms) = data?;
-        scan_files = visit_scan_files(
-            data.as_ref(),
-            &vec,
-            &transforms,
-            scan_files,
-            scan_data_callback,
-        )?;
+    for res in scan_metadata {
+        let scan_metadata = res?;
+        scan_files = scan_metadata.visit_scan_files(scan_files, scan_metadata_callback)?;
     }
 
     let mut batches = vec![];
@@ -393,7 +378,7 @@ fn read_with_scan_data(
             location: file_path,
         };
         let read_results = engine
-            .get_parquet_handler()
+            .parquet_handler()
             .read_parquet_files(
                 &[meta],
                 global_state.physical_schema.clone(),
@@ -471,7 +456,7 @@ fn read_table_data(
             .build()?;
 
         sort_lines!(expected);
-        read_with_scan_data(table.location(), engine.as_ref(), &scan, &expected)?;
+        read_with_scan_metadata(table.location(), engine.as_ref(), &scan, &expected)?;
         read_with_execute(engine, &scan, &expected)?;
     }
     Ok(())
@@ -576,6 +561,26 @@ fn table_for_numbers(nums: Vec<u32>) -> Vec<String> {
     res
 }
 
+// get the basic_partitioned table for a set of expected letters
+fn table_for_letters(letters: &[char]) -> Vec<String> {
+    let mut res: Vec<String> = vec![
+        "+--------+--------+",
+        "| letter | number |",
+        "+--------+--------+",
+    ]
+    .into_iter()
+    .map(String::from)
+    .collect();
+    let rows = vec![(1, 'a'), (2, 'b'), (3, 'c'), (4, 'a'), (5, 'e')];
+    for (num, letter) in rows {
+        if letters.contains(&letter) {
+            res.push(format!("| {letter}      | {num}      |"));
+        }
+    }
+    res.push("+--------+--------+".to_string());
+    res
+}
+
 #[test]
 fn predicate_on_number() -> Result<(), Box<dyn std::error::Error>> {
     let cases = vec![
@@ -613,6 +618,118 @@ fn predicate_on_number() -> Result<(), Box<dyn std::error::Error>> {
     Ok(())
 }
 
+#[test]
+fn predicate_on_letter() -> Result<(), Box<dyn std::error::Error>> {
+    // Test basic column pruning. Note that the actual expression machinery is already well-tested,
+    // so we're just testing wiring here.
+    let null_row_table: Vec<String> = vec![
+        "+--------+--------+",
+        "| letter | number |",
+        "+--------+--------+",
+        "|        | 6      |",
+        "+--------+--------+",
+    ]
+    .into_iter()
+    .map(String::from)
+    .collect();
+
+    let cases = vec![
+        (column_expr!("letter").is_null(), null_row_table),
+        (
+            column_expr!("letter").is_not_null(),
+            table_for_letters(&['a', 'b', 'c', 'e']),
+        ),
+        (
+            column_expr!("letter").lt("c"),
+            table_for_letters(&['a', 'b']),
+        ),
+        (
+            column_expr!("letter").le("c"),
+            table_for_letters(&['a', 'b', 'c']),
+        ),
+        (column_expr!("letter").gt("c"), table_for_letters(&['e'])),
+        (
+            column_expr!("letter").ge("c"),
+            table_for_letters(&['c', 'e']),
+        ),
+        (column_expr!("letter").eq("c"), table_for_letters(&['c'])),
+        (
+            column_expr!("letter").ne("c"),
+            table_for_letters(&['a', 'b', 'e']),
+        ),
+    ];
+
+    for (expr, expected) in cases {
+        read_table_data(
+            "./tests/data/basic_partitioned",
+            Some(&["letter", "number"]),
+            Some(expr),
+            expected,
+        )?;
+    }
+    Ok(())
+}
+
+#[test]
+fn predicate_on_letter_and_number() -> Result<(), Box<dyn std::error::Error>> {
+    // Partition skipping and file skipping are currently implemented separately. Mixing them in an
+    // AND clause will evaulate each separately, but mixing them in an OR clause disables both.
+    let full_table: Vec<String> = vec![
+        "+--------+--------+",
+        "| letter | number |",
+        "+--------+--------+",
+        "|        | 6      |",
+        "| a      | 1      |",
+        "| a      | 4      |",
+        "| b      | 2      |",
+        "| c      | 3      |",
+        "| e      | 5      |",
+        "+--------+--------+",
+    ]
+    .into_iter()
+    .map(String::from)
+    .collect();
+
+    let cases = vec![
+        (
+            Expression::or(
+                // No pruning power
+                column_expr!("letter").gt("a"),
+                column_expr!("number").gt(3i64),
+            ),
+            full_table,
+        ),
+        (
+            Expression::and(
+                column_expr!("letter").gt("a"),  // numbers 2, 3, 5
+                column_expr!("number").gt(3i64), // letters a, e
+            ),
+            table_for_letters(&['e']),
+        ),
+        (
+            Expression::and(
+                column_expr!("letter").gt("a"), // numbers 2, 3, 5
+                Expression::or(
+                    // No pruning power
+                    column_expr!("letter").eq("c"),
+                    column_expr!("number").eq(3i64),
+                ),
+            ),
+            table_for_letters(&['b', 'c', 'e']),
+        ),
+    ];
+
+    for (expr, expected) in cases {
+        read_table_data(
+            "./tests/data/basic_partitioned",
+            Some(&["letter", "number"]),
+            Some(expr),
+            expected,
+        )?;
+    }
+    Ok(())
+}
+
 #[test]
 fn predicate_on_number_not() -> Result<(), Box<dyn std::error::Error>> {
     let cases = vec![
@@ -906,6 +1023,126 @@ fn with_predicate_and_removes() -> Result<(), Box<dyn std::error::Error>> {
     Ok(())
 }
 
+#[tokio::test]
+async fn predicate_on_non_nullable_partition_column() -> Result<(), Box<dyn std::error::Error>> {
+    // Test for https://github.com/delta-io/delta-kernel-rs/issues/698
+    let batch = generate_batch(vec![("val", vec!["a", "b", "c"].into_array())])?;
+
+    let storage = Arc::new(InMemory::new());
+    let actions = [
+        r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#.to_string(),
+        r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"id\"]"},"isBlindAppend":true}}"#.to_string(),
+        r#"{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":["id"],"configuration":{},"createdTime":1587968585495}}"#.to_string(),
+        format!(r#"{{"add":{{"path":"id=1/{PARQUET_FILE1}","partitionValues":{{"id":"1"}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#),
+        format!(r#"{{"add":{{"path":"id=2/{PARQUET_FILE2}","partitionValues":{{"id":"2"}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#),
+    ];
+
+    add_commit(storage.as_ref(), 0, actions.iter().join("\n")).await?;
+    storage
+        .put(
+            &Path::from("id=1").child(PARQUET_FILE1),
+            record_batch_to_bytes(&batch).into(),
+        )
+        .await?;
+    storage
+        .put(
+            &Path::from("id=2").child(PARQUET_FILE2),
+            record_batch_to_bytes(&batch).into(),
+        )
+        .await?;
+
+    let location = Url::parse("memory:///")?;
+    let table = Table::new(location);
+
+    let engine = Arc::new(DefaultEngine::new(
+        storage.clone(),
+        Arc::new(TokioBackgroundExecutor::new()),
+    ));
+    let snapshot = Arc::new(table.snapshot(engine.as_ref(), None)?);
+
+    let predicate = Expression::eq(column_expr!("id"), 2);
+    let scan = snapshot
+        .scan_builder()
+        .with_predicate(Arc::new(predicate))
+        .build()?;
+
+    let stream = scan.execute(engine)?;
+
+    let mut files_scanned = 0;
+    for engine_data in stream {
+        let mut result_batch = into_record_batch(engine_data?.raw_data?);
+        let _ = result_batch.remove_column(result_batch.schema().index_of("id")?);
+        assert_eq!(&batch, &result_batch);
+        files_scanned += 1;
+    }
+    assert_eq!(1, files_scanned);
+    Ok(())
+}
+
+#[tokio::test]
+async fn predicate_on_non_nullable_column_missing_stats() -> Result<(), Box<dyn std::error::Error>>
+{
+    let batch_1 = generate_batch(vec![("val", vec!["a", "b", "c"].into_array())])?;
+    let batch_2 = generate_batch(vec![("val", vec!["d", "e", "f"].into_array())])?;
+
+    let storage = Arc::new(InMemory::new());
+    let actions = [
+        r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#.to_string(),
+        r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}}"#.to_string(),
+        r#"{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"val\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}}"#.to_string(),
+        // Add one file with stats, one file without
+        format!(r#"{{"add":{{"path":"{PARQUET_FILE1}","partitionValues":{{}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#),
+        format!(r#"{{"add":{{"path":"{PARQUET_FILE2}","partitionValues":{{}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{}},\"minValues\":{{}},\"maxValues\":{{}}}}"}}}}"#),
+    ];
+
+    // Disable writing Parquet statistics so these cannot be used for pruning row groups
+    let writer_props = WriterProperties::builder()
+        .set_statistics_enabled(EnabledStatistics::None)
+        .build();
+
+    add_commit(storage.as_ref(), 0, actions.iter().join("\n")).await?;
+    storage
+        .put(
+            &Path::from(PARQUET_FILE1),
+            record_batch_to_bytes_with_props(&batch_1, writer_props.clone()).into(),
+        )
+        .await?;
+    storage
+        .put(
+            &Path::from(PARQUET_FILE2),
+            record_batch_to_bytes_with_props(&batch_2, writer_props).into(),
+        )
+        .await?;
+
+    let location = Url::parse("memory:///")?;
+    let table = Table::new(location);
+
+    let engine = Arc::new(DefaultEngine::new(
+        storage.clone(),
+        Arc::new(TokioBackgroundExecutor::new()),
+    ));
+    let snapshot = Arc::new(table.snapshot(engine.as_ref(), None)?);
+
+    let predicate = Expression::eq(column_expr!("val"), "g");
+    let scan = snapshot
+        .scan_builder()
+        .with_predicate(Arc::new(predicate))
+        .build()?;
+
+    let stream = scan.execute(engine)?;
+
+    let mut files_scanned = 0;
+    for engine_data in stream {
+        let result_batch = into_record_batch(engine_data?.raw_data?);
+        assert_eq!(&batch_2, &result_batch);
+        files_scanned += 1;
+    }
+    // One file is scanned as stats are missing so we don't know the predicate isn't satisfied
+    assert_eq!(1, files_scanned);
+
+    Ok(())
+}
+
 #[test]
 fn short_dv() -> Result<(), Box<dyn std::error::Error>> {
     let expected = vec![
diff --git a/kernel/tests/v2_checkpoints.rs b/kernel/tests/v2_checkpoints.rs
new file mode 100644
index 000000000..4384ed6e0
--- /dev/null
+++ b/kernel/tests/v2_checkpoints.rs
@@ -0,0 +1,224 @@
+use std::sync::Arc;
+
+use delta_kernel::arrow::array::RecordBatch;
+use delta_kernel::engine::sync::SyncEngine;
+
+use delta_kernel::engine::arrow_data::ArrowEngineData;
+use delta_kernel::{DeltaResult, Table};
+
+mod common;
+use common::{load_test_data, read_scan};
+use itertools::Itertools;
+
+fn read_v2_checkpoint_table(test_name: impl AsRef<str>) -> DeltaResult<Vec<RecordBatch>> {
+    let test_dir = load_test_data("tests/data", test_name.as_ref()).unwrap();
+    let test_path = test_dir.path().join(test_name.as_ref());
+
+    let table = Table::try_from_uri(test_path.to_str().expect("table path to string")).unwrap();
+    let engine = Arc::new(SyncEngine::new());
+    let snapshot = table.snapshot(engine.as_ref(), None)?;
+    let scan = snapshot.into_scan_builder().build()?;
+    let batches = read_scan(&scan, engine)?;
+
+    Ok(batches)
+}
+
+fn test_v2_checkpoint_with_table(
+    table_name: &str,
+    mut expected_table: Vec<String>,
+) -> DeltaResult<()> {
+    let batches = read_v2_checkpoint_table(table_name)?;
+
+    sort_lines!(expected_table);
+    assert_batches_sorted_eq!(expected_table, &batches);
+    Ok(())
+}
+
+/// Helper function to convert string slice vectors to String vectors
+fn to_string_vec(string_slice_vec: Vec<&str>) -> Vec<String> {
+    string_slice_vec
+        .into_iter()
+        .map(|s| s.to_string())
+        .collect()
+}
+
+fn generate_sidecar_expected_data() -> Vec<String> {
+    let header = vec![
+        "+-----+".to_string(),
+        "| id  |".to_string(),
+        "+-----+".to_string(),
+    ];
+
+    // Generate rows for different ranges
+    let generate_rows = |count: usize| -> Vec<String> {
+        (0..count)
+            .map(|id| format!("| {:<max_width$} |", id, max_width = 3))
+            .collect()
+    };
+
+    [
+        header,
+        vec!["| 0   |".to_string(); 3],
+        generate_rows(30),
+        generate_rows(100),
+        generate_rows(100),
+        generate_rows(1000),
+        vec!["+-----+".to_string()],
+    ]
+    .into_iter()
+    .flatten()
+    .collect_vec()
+}
+
+// Rustfmt is disabled to maintain the readability of the expected table
+#[rustfmt::skip]
+fn get_simple_id_table() -> Vec<String> {
+    to_string_vec(vec![
+        "+----+",
+        "| id |",
+        "+----+",
+        "| 0  |",
+        "| 1  |",
+        "| 2  |",
+        "| 3  |",
+        "| 4  |",
+        "| 5  |",
+        "| 6  |",
+        "| 7  |",
+        "| 8  |",
+        "| 9  |",
+        "+----+",
+    ])
+}
+
+// Rustfmt is disabled to maintain the readability of the expected table
+#[rustfmt::skip]
+fn get_classic_checkpoint_table() -> Vec<String> {
+    to_string_vec(vec![
+        "+----+",
+        "| id |",
+        "+----+",
+        "| 0  |",
+        "| 1  |",
+        "| 2  |",
+        "| 3  |",
+        "| 4  |",
+        "| 5  |",
+        "| 6  |",
+        "| 7  |",
+        "| 8  |",
+        "| 9  |",
+        "| 10 |",
+        "| 11 |",
+        "| 12 |",
+        "| 13 |",
+        "| 14 |",
+        "| 15 |",
+        "| 16 |",
+        "| 17 |",
+        "| 18 |",
+        "| 19 |",
+        "+----+",
+    ])
+}
+
+// Rustfmt is disabled to maintain the readability of the expected table
+#[rustfmt::skip]
+fn get_without_sidecars_table() -> Vec<String> {
+    to_string_vec(vec![
+        "+------+",
+        "| id   |",
+        "+------+",
+        "| 0    |",
+        "| 1    |",
+        "| 2    |",
+        "| 3    |",
+        "| 4    |",
+        "| 5    |",
+        "| 6    |",
+        "| 7    |",
+        "| 8    |",
+        "| 9    |",
+        "| 2718 |",
+        "+------+",
+    ])
+}
+
+/// The test cases below are derived from delta-spark's `CheckpointSuite`.
+///
+/// These tests are converted from delta-spark using the following process:
+/// 1. Specific test cases of interest in `delta-spark` were modified to persist their generated tables
+/// 2. These tables were compressed into `.tar.zst` archives and copied to delta-kernel-rs
+/// 3. Each test loads a stored table, scans it, and asserts that the returned table state
+///    matches the expected state derived from the corresponding table insertions in `delta-spark`
+///
+/// The following is the ported list of `delta-spark` tests -> `delta-kernel-rs` tests:
+///
+/// - `multipart v2 checkpoint` -> `v2_checkpoints_json_with_sidecars`
+/// - `multipart v2 checkpoint` -> `v2_checkpoints_parquet_with_sidecars`
+/// - `All actions in V2 manifest` -> `v2_checkpoints_json_without_sidecars`
+/// - `All actions in V2 manifest` -> `v2_checkpoints_parquet_without_sidecars`
+/// - `V2 Checkpoint compat file equivalency to normal V2 Checkpoint` -> `v2_classic_checkpoint_json`
+/// - `V2 Checkpoint compat file equivalency to normal V2 Checkpoint` -> `v2_classic_checkpoint_parquet`
+/// - `last checkpoint contains correct schema for v1/v2 Checkpoints` -> `v2_checkpoints_json_with_last_checkpoint`
+/// - `last checkpoint contains correct schema for v1/v2 Checkpoints` -> `v2_checkpoints_parquet_with_last_checkpoint`
+#[test]
+fn v2_checkpoints_json_with_sidecars() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-checkpoints-json-with-sidecars",
+        generate_sidecar_expected_data(),
+    )
+}
+
+#[test]
+fn v2_checkpoints_parquet_with_sidecars() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-checkpoints-parquet-with-sidecars",
+        generate_sidecar_expected_data(),
+    )
+}
+
+#[test]
+fn v2_checkpoints_json_without_sidecars() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-checkpoints-json-without-sidecars",
+        get_without_sidecars_table(),
+    )
+}
+
+#[test]
+fn v2_checkpoints_parquet_without_sidecars() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-checkpoints-parquet-without-sidecars",
+        get_without_sidecars_table(),
+    )
+}
+
+#[test]
+fn v2_classic_checkpoint_json() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table("v2-classic-checkpoint-json", get_classic_checkpoint_table())
+}
+
+#[test]
+fn v2_classic_checkpoint_parquet() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-classic-checkpoint-parquet",
+        get_classic_checkpoint_table(),
+    )
+}
+
+#[test]
+fn v2_checkpoints_json_with_last_checkpoint() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-checkpoints-json-with-last-checkpoint",
+        get_simple_id_table(),
+    )
+}
+
+#[test]
+fn v2_checkpoints_parquet_with_last_checkpoint() -> DeltaResult<()> {
+    test_v2_checkpoint_with_table(
+        "v2-checkpoints-parquet-with-last-checkpoint",
+        get_simple_id_table(),
+    )
+}
diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs
index 2ee6dfdd5..eb3671595 100644
--- a/kernel/tests/write.rs
+++ b/kernel/tests/write.rs
@@ -1,10 +1,12 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use arrow::array::{Int32Array, StringArray};
-use arrow::record_batch::RecordBatch;
-use arrow_schema::Schema as ArrowSchema;
-use arrow_schema::{DataType as ArrowDataType, Field};
+use delta_kernel::arrow::array::{
+    Int32Array, MapBuilder, MapFieldNames, StringArray, StringBuilder,
+};
+use delta_kernel::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema};
+use delta_kernel::arrow::error::ArrowError;
+use delta_kernel::arrow::record_batch::RecordBatch;
 use itertools::Itertools;
 use object_store::local::LocalFileSystem;
 use object_store::memory::InMemory;
@@ -46,7 +48,7 @@ fn setup(
     let table_root_path = Path::from(format!("{base_path}{table_name}"));
     let url = Url::parse(&format!("{base_url}{table_root_path}/")).unwrap();
     let executor = Arc::new(TokioBackgroundExecutor::new());
-    let engine = DefaultEngine::new(Arc::clone(&storage), table_root_path, executor);
+    let engine = DefaultEngine::new(Arc::clone(&storage), executor);
 
     (storage, engine, url)
 }
@@ -58,18 +60,28 @@ async fn create_table(
     table_path: Url,
     schema: SchemaRef,
     partition_columns: &[&str],
+    use_37_protocol: bool,
 ) -> Result<Table, Box<dyn std::error::Error>> {
     let table_id = "test_id";
     let schema = serde_json::to_string(&schema)?;
 
-    let protocol = json!({
-        "protocol": {
-            "minReaderVersion": 3,
-            "minWriterVersion": 7,
-            "readerFeatures": [],
-            "writerFeatures": []
-        }
-    });
+    let protocol = if use_37_protocol {
+        json!({
+            "protocol": {
+                "minReaderVersion": 3,
+                "minWriterVersion": 7,
+                "readerFeatures": [],
+                "writerFeatures": []
+            }
+        })
+    } else {
+        json!({
+            "protocol": {
+                "minReaderVersion": 1,
+                "minWriterVersion": 1,
+            }
+        })
+    };
     let metadata = json!({
         "metaData": {
             "id": table_id,
@@ -120,15 +132,14 @@ fn new_commit_info() -> DeltaResult<Box<ArrowEngineData>> {
         false,
     )]));
 
-    use arrow_array::builder::StringBuilder;
     let key_builder = StringBuilder::new();
     let val_builder = StringBuilder::new();
-    let names = arrow_array::builder::MapFieldNames {
+    let names = MapFieldNames {
         entry: "entries".to_string(),
         key: "key".to_string(),
         value: "value".to_string(),
     };
-    let mut builder = arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder);
+    let mut builder = MapBuilder::new(Some(names), key_builder, val_builder);
     builder.keys().append_value("engineInfo");
     builder.values().append_value("default engine");
     builder.append(true).unwrap();
@@ -139,56 +150,99 @@ fn new_commit_info() -> DeltaResult<Box<ArrowEngineData>> {
     Ok(Box::new(ArrowEngineData::new(commit_info_batch)))
 }
 
+async fn setup_tables(
+    schema: SchemaRef,
+    partition_columns: &[&str],
+) -> Result<
+    Vec<(
+        Table,
+        DefaultEngine<TokioBackgroundExecutor>,
+        Arc<dyn ObjectStore>,
+        &'static str,
+    )>,
+    Box<dyn std::error::Error>,
+> {
+    let (store_37, engine_37, table_location_37) = setup("test_table_37", true);
+    let (store_11, engine_11, table_location_11) = setup("test_table_11", true);
+    Ok(vec![
+        (
+            create_table(
+                store_37.clone(),
+                table_location_37,
+                schema.clone(),
+                partition_columns,
+                true,
+            )
+            .await?,
+            engine_37,
+            store_37,
+            "test_table_37",
+        ),
+        (
+            create_table(
+                store_11.clone(),
+                table_location_11,
+                schema,
+                partition_columns,
+                false,
+            )
+            .await?,
+            engine_11,
+            store_11,
+            "test_table_11",
+        ),
+    ])
+}
+
 #[tokio::test]
 async fn test_commit_info() -> Result<(), Box<dyn std::error::Error>> {
     // setup tracing
     let _ = tracing_subscriber::fmt::try_init();
-    // setup in-memory object store and default engine
-    let (store, engine, table_location) = setup("test_table", true);
 
     // create a simple table: one int column named 'number'
     let schema = Arc::new(StructType::new(vec![StructField::nullable(
         "number",
         DataType::INTEGER,
     )]));
-    let table = create_table(store.clone(), table_location, schema, &[]).await?;
-
-    let commit_info = new_commit_info()?;
-
-    // create a transaction
-    let txn = table
-        .new_transaction(&engine)?
-        .with_commit_info(commit_info);
-
-    // commit!
-    txn.commit(&engine)?;
-
-    let commit1 = store
-        .get(&Path::from(
-            "/test_table/_delta_log/00000000000000000001.json",
-        ))
-        .await?;
-
-    let mut parsed_commit: serde_json::Value = serde_json::from_slice(&commit1.bytes().await?)?;
-    *parsed_commit
-        .get_mut("commitInfo")
-        .unwrap()
-        .get_mut("timestamp")
-        .unwrap() = serde_json::Value::Number(0.into());
-
-    let expected_commit = json!({
-        "commitInfo": {
-            "timestamp": 0,
-            "operation": "UNKNOWN",
-            "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")),
-            "operationParameters": {},
-            "engineCommitInfo": {
-                "engineInfo": "default engine"
+
+    for (table, engine, store, table_name) in setup_tables(schema, &[]).await? {
+        let commit_info = new_commit_info()?;
+
+        // create a transaction
+        let txn = table
+            .new_transaction(&engine)?
+            .with_commit_info(commit_info);
+
+        // commit!
+        txn.commit(&engine)?;
+
+        let commit1 = store
+            .get(&Path::from(format!(
+                "/{table_name}/_delta_log/00000000000000000001.json"
+            )))
+            .await?;
+
+        let mut parsed_commit: serde_json::Value = serde_json::from_slice(&commit1.bytes().await?)?;
+        *parsed_commit
+            .get_mut("commitInfo")
+            .unwrap()
+            .get_mut("timestamp")
+            .unwrap() = serde_json::Value::Number(0.into());
+
+        let expected_commit = json!({
+            "commitInfo": {
+                "timestamp": 0,
+                "operation": "UNKNOWN",
+                "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")),
+                "operationParameters": {},
+                "engineCommitInfo": {
+                    "engineInfo": "default engine"
+                }
             }
-        }
-    });
+        });
 
-    assert_eq!(parsed_commit, expected_commit);
+        assert_eq!(parsed_commit, expected_commit);
+    }
     Ok(())
 }
 
@@ -196,21 +250,18 @@ async fn test_commit_info() -> Result<(), Box<dyn std::error::Error>> {
 async fn test_empty_commit() -> Result<(), Box<dyn std::error::Error>> {
     // setup tracing
     let _ = tracing_subscriber::fmt::try_init();
-    // setup in-memory object store and default engine
-    let (store, engine, table_location) = setup("test_table", true);
-
     // create a simple table: one int column named 'number'
     let schema = Arc::new(StructType::new(vec![StructField::nullable(
         "number",
         DataType::INTEGER,
     )]));
-    let table = create_table(store.clone(), table_location, schema, &[]).await?;
-
-    assert!(matches!(
-        table.new_transaction(&engine)?.commit(&engine).unwrap_err(),
-        KernelError::MissingCommitInfo
-    ));
 
+    for (table, engine, _store, _table_name) in setup_tables(schema, &[]).await? {
+        assert!(matches!(
+            table.new_transaction(&engine)?.commit(&engine).unwrap_err(),
+            KernelError::MissingCommitInfo
+        ));
+    }
     Ok(())
 }
 
@@ -218,53 +269,51 @@ async fn test_empty_commit() -> Result<(), Box<dyn std::error::Error>> {
 async fn test_invalid_commit_info() -> Result<(), Box<dyn std::error::Error>> {
     // setup tracing
     let _ = tracing_subscriber::fmt::try_init();
-    // setup in-memory object store and default engine
-    let (store, engine, table_location) = setup("test_table", true);
 
     // create a simple table: one int column named 'number'
     let schema = Arc::new(StructType::new(vec![StructField::nullable(
         "number",
         DataType::INTEGER,
     )]));
-    let table = create_table(store.clone(), table_location, schema, &[]).await?;
-
-    // empty commit info test
-    let commit_info_schema = Arc::new(ArrowSchema::empty());
-    let commit_info_batch = RecordBatch::new_empty(commit_info_schema.clone());
-    assert!(commit_info_batch.num_rows() == 0);
-    let txn = table
-        .new_transaction(&engine)?
-        .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch)));
-
-    // commit!
-    assert!(matches!(
-        txn.commit(&engine),
-        Err(KernelError::InvalidCommitInfo(_))
-    ));
-
-    // two-row commit info test
-    let commit_info_schema = Arc::new(ArrowSchema::new(vec![Field::new(
-        "engineInfo",
-        ArrowDataType::Utf8,
-        true,
-    )]));
-    let commit_info_batch = RecordBatch::try_new(
-        commit_info_schema.clone(),
-        vec![Arc::new(StringArray::from(vec![
-            "row1: default engine",
-            "row2: default engine",
-        ]))],
-    )?;
-
-    let txn = table
-        .new_transaction(&engine)?
-        .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch)));
-
-    // commit!
-    assert!(matches!(
-        txn.commit(&engine),
-        Err(KernelError::InvalidCommitInfo(_))
-    ));
+    for (table, engine, _store, _table_name) in setup_tables(schema, &[]).await? {
+        // empty commit info test
+        let commit_info_schema = Arc::new(ArrowSchema::empty());
+        let commit_info_batch = RecordBatch::new_empty(commit_info_schema.clone());
+        assert!(commit_info_batch.num_rows() == 0);
+        let txn = table
+            .new_transaction(&engine)?
+            .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch)));
+
+        // commit!
+        assert!(matches!(
+            txn.commit(&engine),
+            Err(KernelError::InvalidCommitInfo(_))
+        ));
+
+        // two-row commit info test
+        let commit_info_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "engineInfo",
+            ArrowDataType::Utf8,
+            true,
+        )]));
+        let commit_info_batch = RecordBatch::try_new(
+            commit_info_schema.clone(),
+            vec![Arc::new(StringArray::from(vec![
+                "row1: default engine",
+                "row2: default engine",
+            ]))],
+        )?;
+
+        let txn = table
+            .new_transaction(&engine)?
+            .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch)));
+
+        // commit!
+        assert!(matches!(
+            txn.commit(&engine),
+            Err(KernelError::InvalidCommitInfo(_))
+        ));
+    }
     Ok(())
 }
 
@@ -329,125 +378,123 @@ async fn get_and_check_all_parquet_sizes(store: Arc<dyn ObjectStore>, path: &str
 async fn test_append() -> Result<(), Box<dyn std::error::Error>> {
     // setup tracing
     let _ = tracing_subscriber::fmt::try_init();
-    // setup in-memory object store and default engine
-    let (store, engine, table_location) = setup("test_table", true);
-
     // create a simple table: one int column named 'number'
     let schema = Arc::new(StructType::new(vec![StructField::nullable(
         "number",
         DataType::INTEGER,
     )]));
-    let table = create_table(store.clone(), table_location, schema.clone(), &[]).await?;
 
-    let commit_info = new_commit_info()?;
+    for (table, engine, store, table_name) in setup_tables(schema.clone(), &[]).await? {
+        let commit_info = new_commit_info()?;
 
-    let mut txn = table
-        .new_transaction(&engine)?
-        .with_commit_info(commit_info);
+        let mut txn = table
+            .new_transaction(&engine)?
+            .with_commit_info(commit_info);
 
-    // create two new arrow record batches to append
-    let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> {
-        let data = RecordBatch::try_new(
-            Arc::new(schema.as_ref().try_into()?),
-            vec![Arc::new(arrow::array::Int32Array::from(data.to_vec()))],
-        )?;
-        Ok(Box::new(ArrowEngineData::new(data)))
-    });
+        // create two new arrow record batches to append
+        let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> {
+            let data = RecordBatch::try_new(
+                Arc::new(schema.as_ref().try_into()?),
+                vec![Arc::new(Int32Array::from(data.to_vec()))],
+            )?;
+            Ok(Box::new(ArrowEngineData::new(data)))
+        });
 
-    // write data out by spawning async tasks to simulate executors
-    let engine = Arc::new(engine);
-    let write_context = Arc::new(txn.get_write_context());
-    let tasks = append_data.into_iter().map(|data| {
-        // arc clones
-        let engine = engine.clone();
-        let write_context = write_context.clone();
-        tokio::task::spawn(async move {
-            engine
-                .write_parquet(
-                    data.as_ref().unwrap(),
-                    write_context.as_ref(),
-                    HashMap::new(),
-                    true,
-                )
-                .await
-        })
-    });
+        // write data out by spawning async tasks to simulate executors
+        let engine = Arc::new(engine);
+        let write_context = Arc::new(txn.get_write_context());
+        let tasks = append_data.into_iter().map(|data| {
+            // arc clones
+            let engine = engine.clone();
+            let write_context = write_context.clone();
+            tokio::task::spawn(async move {
+                engine
+                    .write_parquet(
+                        data.as_ref().unwrap(),
+                        write_context.as_ref(),
+                        HashMap::new(),
+                        true,
+                    )
+                    .await
+            })
+        });
 
-    let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten();
-    for meta in write_metadata {
-        txn.add_write_metadata(meta?);
-    }
+        let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten();
+        for meta in write_metadata {
+            txn.add_write_metadata(meta?);
+        }
 
-    // commit!
-    txn.commit(engine.as_ref())?;
-
-    let commit1 = store
-        .get(&Path::from(
-            "/test_table/_delta_log/00000000000000000001.json",
-        ))
-        .await?;
-
-    let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?)
-        .into_iter::<serde_json::Value>()
-        .try_collect()?;
-
-    let size = get_and_check_all_parquet_sizes(store.clone(), "/test_table/").await;
-    // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now()
-    // before we clear them for comparison
-    check_action_timestamps(parsed_commits.iter())?;
-
-    // set timestamps to 0 and paths to known string values for comparison
-    // (otherwise timestamps are non-deterministic and paths are random UUIDs)
-    set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?;
-    set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?;
-    set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?;
-    set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?;
-    set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?;
-
-    let expected_commit = vec![
-        json!({
-            "commitInfo": {
-                "timestamp": 0,
-                "operation": "UNKNOWN",
-                "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")),
-                "operationParameters": {},
-                "engineCommitInfo": {
-                    "engineInfo": "default engine"
+        // commit!
+        txn.commit(engine.as_ref())?;
+
+        let commit1 = store
+            .get(&Path::from(format!(
+                "/{table_name}/_delta_log/00000000000000000001.json"
+            )))
+            .await?;
+
+        let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?)
+            .into_iter::<serde_json::Value>()
+            .try_collect()?;
+
+        let size =
+            get_and_check_all_parquet_sizes(store.clone(), format!("/{table_name}/").as_str())
+                .await;
+        // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now()
+        // before we clear them for comparison
+        check_action_timestamps(parsed_commits.iter())?;
+
+        // set timestamps to 0 and paths to known string values for comparison
+        // (otherwise timestamps are non-deterministic and paths are random UUIDs)
+        set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?;
+        set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?;
+        set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?;
+        set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?;
+        set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?;
+
+        let expected_commit = vec![
+            json!({
+                "commitInfo": {
+                    "timestamp": 0,
+                    "operation": "UNKNOWN",
+                    "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")),
+                    "operationParameters": {},
+                    "engineCommitInfo": {
+                        "engineInfo": "default engine"
+                    }
                 }
-            }
-        }),
-        json!({
-            "add": {
-                "path": "first.parquet",
-                "partitionValues": {},
-                "size": size,
-                "modificationTime": 0,
-                "dataChange": true
-            }
-        }),
-        json!({
-            "add": {
-                "path": "second.parquet",
-                "partitionValues": {},
-                "size": size,
-                "modificationTime": 0,
-                "dataChange": true
-            }
-        }),
-    ];
-
-    assert_eq!(parsed_commits, expected_commit);
-
-    test_read(
-        &ArrowEngineData::new(RecordBatch::try_new(
-            Arc::new(schema.as_ref().try_into()?),
-            vec![Arc::new(arrow::array::Int32Array::from(vec![
-                1, 2, 3, 4, 5, 6,
-            ]))],
-        )?),
-        &table,
-        engine,
-    )?;
+            }),
+            json!({
+                "add": {
+                    "path": "first.parquet",
+                    "partitionValues": {},
+                    "size": size,
+                    "modificationTime": 0,
+                    "dataChange": true
+                }
+            }),
+            json!({
+                "add": {
+                    "path": "second.parquet",
+                    "partitionValues": {},
+                    "size": size,
+                    "modificationTime": 0,
+                    "dataChange": true
+                }
+            }),
+        ];
+
+        assert_eq!(parsed_commits, expected_commit);
+
+        test_read(
+            &ArrowEngineData::new(RecordBatch::try_new(
+                Arc::new(schema.as_ref().try_into()?),
+                vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]))],
+            )?),
+            &table,
+            engine,
+        )?;
+    }
     Ok(())
 }
 
@@ -455,8 +502,7 @@ async fn test_append() -> Result<(), Box<dyn std::error::Error>> {
 async fn test_append_partitioned() -> Result<(), Box<dyn std::error::Error>> {
     // setup tracing
     let _ = tracing_subscriber::fmt::try_init();
-    // setup in-memory object store and default engine
-    let (store, engine, table_location) = setup("test_table", true);
+
     let partition_col = "partition";
 
     // create a simple partitioned table: one int column named 'number', partitioned by string
@@ -469,132 +515,131 @@ async fn test_append_partitioned() -> Result<(), Box<dyn std::error::Error>> {
         "number",
         DataType::INTEGER,
     )]));
-    let table = create_table(
-        store.clone(),
-        table_location,
-        table_schema.clone(),
-        &[partition_col],
-    )
-    .await?;
-
-    let commit_info = new_commit_info()?;
-
-    let mut txn = table
-        .new_transaction(&engine)?
-        .with_commit_info(commit_info);
-
-    // create two new arrow record batches to append
-    let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> {
-        let data = RecordBatch::try_new(
-            Arc::new(data_schema.as_ref().try_into()?),
-            vec![Arc::new(arrow::array::Int32Array::from(data.to_vec()))],
-        )?;
-        Ok(Box::new(ArrowEngineData::new(data)))
-    });
-    let partition_vals = vec!["a", "b"];
 
-    // write data out by spawning async tasks to simulate executors
-    let engine = Arc::new(engine);
-    let write_context = Arc::new(txn.get_write_context());
-    let tasks = append_data
-        .into_iter()
-        .zip(partition_vals)
-        .map(|(data, partition_val)| {
-            // arc clones
-            let engine = engine.clone();
-            let write_context = write_context.clone();
-            tokio::task::spawn(async move {
-                engine
-                    .write_parquet(
-                        data.as_ref().unwrap(),
-                        write_context.as_ref(),
-                        HashMap::from([(partition_col.to_string(), partition_val.to_string())]),
-                        true,
-                    )
-                    .await
-            })
+    for (table, engine, store, table_name) in
+        setup_tables(table_schema.clone(), &[partition_col]).await?
+    {
+        let commit_info = new_commit_info()?;
+
+        let mut txn = table
+            .new_transaction(&engine)?
+            .with_commit_info(commit_info);
+
+        // create two new arrow record batches to append
+        let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> {
+            let data = RecordBatch::try_new(
+                Arc::new(data_schema.as_ref().try_into()?),
+                vec![Arc::new(Int32Array::from(data.to_vec()))],
+            )?;
+            Ok(Box::new(ArrowEngineData::new(data)))
         });
+        let partition_vals = vec!["a", "b"];
+
+        // write data out by spawning async tasks to simulate executors
+        let engine = Arc::new(engine);
+        let write_context = Arc::new(txn.get_write_context());
+        let tasks = append_data
+            .into_iter()
+            .zip(partition_vals)
+            .map(|(data, partition_val)| {
+                // arc clones
+                let engine = engine.clone();
+                let write_context = write_context.clone();
+                tokio::task::spawn(async move {
+                    engine
+                        .write_parquet(
+                            data.as_ref().unwrap(),
+                            write_context.as_ref(),
+                            HashMap::from([(partition_col.to_string(), partition_val.to_string())]),
+                            true,
+                        )
+                        .await
+                })
+            });
+
+        let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten();
+        for meta in write_metadata {
+            txn.add_write_metadata(meta?);
+        }
 
-    let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten();
-    for meta in write_metadata {
-        txn.add_write_metadata(meta?);
-    }
-
-    // commit!
-    txn.commit(engine.as_ref())?;
-
-    let commit1 = store
-        .get(&Path::from(
-            "/test_table/_delta_log/00000000000000000001.json",
-        ))
-        .await?;
-
-    let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?)
-        .into_iter::<serde_json::Value>()
-        .try_collect()?;
-
-    let size = get_and_check_all_parquet_sizes(store.clone(), "/test_table/").await;
-    // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now()
-    // before we clear them for comparison
-    check_action_timestamps(parsed_commits.iter())?;
-
-    // set timestamps to 0 and paths to known string values for comparison
-    // (otherwise timestamps are non-deterministic and paths are random UUIDs)
-    set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?;
-    set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?;
-    set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?;
-    set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?;
-    set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?;
-
-    let expected_commit = vec![
-        json!({
-            "commitInfo": {
-                "timestamp": 0,
-                "operation": "UNKNOWN",
-                "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")),
-                "operationParameters": {},
-                "engineCommitInfo": {
-                    "engineInfo": "default engine"
+        // commit!
+        txn.commit(engine.as_ref())?;
+
+        let commit1 = store
+            .get(&Path::from(format!(
+                "/{table_name}/_delta_log/00000000000000000001.json"
+            )))
+            .await?;
+
+        let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?)
+            .into_iter::<serde_json::Value>()
+            .try_collect()?;
+
+        let size =
+            get_and_check_all_parquet_sizes(store.clone(), format!("/{table_name}/").as_str())
+                .await;
+        // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now()
+        // before we clear them for comparison
+        check_action_timestamps(parsed_commits.iter())?;
+
+        // set timestamps to 0 and paths to known string values for comparison
+        // (otherwise timestamps are non-deterministic and paths are random UUIDs)
+        set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?;
+        set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?;
+        set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?;
+        set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?;
+        set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?;
+
+        let expected_commit = vec![
+            json!({
+                "commitInfo": {
+                    "timestamp": 0,
+                    "operation": "UNKNOWN",
+                    "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")),
+                    "operationParameters": {},
+                    "engineCommitInfo": {
+                        "engineInfo": "default engine"
+                    }
                 }
-            }
-        }),
-        json!({
-            "add": {
-                "path": "first.parquet",
-                "partitionValues": {
-                    "partition": "a"
-                },
-                "size": size,
-                "modificationTime": 0,
-                "dataChange": true
-            }
-        }),
-        json!({
-            "add": {
-                "path": "second.parquet",
-                "partitionValues": {
-                    "partition": "b"
-                },
-                "size": size,
-                "modificationTime": 0,
-                "dataChange": true
-            }
-        }),
-    ];
-
-    assert_eq!(parsed_commits, expected_commit);
-
-    test_read(
-        &ArrowEngineData::new(RecordBatch::try_new(
-            Arc::new(table_schema.as_ref().try_into()?),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])),
-                Arc::new(StringArray::from(vec!["a", "a", "a", "b", "b", "b"])),
-            ],
-        )?),
-        &table,
-        engine,
-    )?;
+            }),
+            json!({
+                "add": {
+                    "path": "first.parquet",
+                    "partitionValues": {
+                        "partition": "a"
+                    },
+                    "size": size,
+                    "modificationTime": 0,
+                    "dataChange": true
+                }
+            }),
+            json!({
+                "add": {
+                    "path": "second.parquet",
+                    "partitionValues": {
+                        "partition": "b"
+                    },
+                    "size": size,
+                    "modificationTime": 0,
+                    "dataChange": true
+                }
+            }),
+        ];
+
+        assert_eq!(parsed_commits, expected_commit);
+
+        test_read(
+            &ArrowEngineData::new(RecordBatch::try_new(
+                Arc::new(table_schema.as_ref().try_into()?),
+                vec![
+                    Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])),
+                    Arc::new(StringArray::from(vec!["a", "a", "a", "b", "b", "b"])),
+                ],
+            )?),
+            &table,
+            engine,
+        )?;
+    }
     Ok(())
 }
 
@@ -602,9 +647,6 @@ async fn test_append_partitioned() -> Result<(), Box<dyn std::error::Error>> {
 async fn test_append_invalid_schema() -> Result<(), Box<dyn std::error::Error>> {
     // setup tracing
     let _ = tracing_subscriber::fmt::try_init();
-    // setup in-memory object store and default engine
-    let (store, engine, table_location) = setup("test_table", true);
-
     // create a simple table: one int column named 'number'
     let table_schema = Arc::new(StructType::new(vec![StructField::nullable(
         "number",
@@ -615,52 +657,50 @@ async fn test_append_invalid_schema() -> Result<(), Box<dyn std::error::Error>>
         "string",
         DataType::STRING,
     )]));
-    let table = create_table(store.clone(), table_location, table_schema.clone(), &[]).await?;
 
-    let commit_info = new_commit_info()?;
+    for (table, engine, _store, _table_name) in setup_tables(table_schema, &[]).await? {
+        let commit_info = new_commit_info()?;
 
-    let txn = table
-        .new_transaction(&engine)?
-        .with_commit_info(commit_info);
+        let txn = table
+            .new_transaction(&engine)?
+            .with_commit_info(commit_info);
 
-    // create two new arrow record batches to append
-    let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> {
-        let data = RecordBatch::try_new(
-            Arc::new(data_schema.as_ref().try_into()?),
-            vec![Arc::new(arrow::array::StringArray::from(data.to_vec()))],
-        )?;
-        Ok(Box::new(ArrowEngineData::new(data)))
-    });
+        // create two new arrow record batches to append
+        let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> {
+            let data = RecordBatch::try_new(
+                Arc::new(data_schema.as_ref().try_into()?),
+                vec![Arc::new(StringArray::from(data.to_vec()))],
+            )?;
+            Ok(Box::new(ArrowEngineData::new(data)))
+        });
 
-    // write data out by spawning async tasks to simulate executors
-    let engine = Arc::new(engine);
-    let write_context = Arc::new(txn.get_write_context());
-    let tasks = append_data.into_iter().map(|data| {
-        // arc clones
-        let engine = engine.clone();
-        let write_context = write_context.clone();
-        tokio::task::spawn(async move {
-            engine
-                .write_parquet(
-                    data.as_ref().unwrap(),
-                    write_context.as_ref(),
-                    HashMap::new(),
-                    true,
-                )
-                .await
-        })
-    });
+        // write data out by spawning async tasks to simulate executors
+        let engine = Arc::new(engine);
+        let write_context = Arc::new(txn.get_write_context());
+        let tasks = append_data.into_iter().map(|data| {
+            // arc clones
+            let engine = engine.clone();
+            let write_context = write_context.clone();
+            tokio::task::spawn(async move {
+                engine
+                    .write_parquet(
+                        data.as_ref().unwrap(),
+                        write_context.as_ref(),
+                        HashMap::new(),
+                        true,
+                    )
+                    .await
+            })
+        });
 
-    let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten();
-    assert!(write_metadata.all(|res| match res {
-        Err(KernelError::Arrow(arrow_schema::ArrowError::SchemaError(_))) => true,
-        Err(KernelError::Backtraced { source, .. })
-            if matches!(
-                &*source,
-                KernelError::Arrow(arrow_schema::ArrowError::SchemaError(_))
-            ) =>
-            true,
-        _ => false,
-    }));
+        let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten();
+        assert!(write_metadata.all(|res| match res {
+            Err(KernelError::Arrow(ArrowError::SchemaError(_))) => true,
+            Err(KernelError::Backtraced { source, .. })
+                if matches!(&*source, KernelError::Arrow(ArrowError::SchemaError(_))) =>
+                true,
+            _ => false,
+        }));
+    }
     Ok(())
 }
diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml
index 0a90e96ed..20df4a524 100644
--- a/test-utils/Cargo.toml
+++ b/test-utils/Cargo.toml
@@ -12,9 +12,6 @@ version.workspace = true
 release = false
 
 [dependencies]
-arrow-array = { workspace = true, features = ["chrono-tz"] }
-arrow-schema = { workspace = true }
-delta_kernel = { path = "../kernel", features = [ "default-engine" ] }
+delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] }
 itertools = "0.13.0"
 object_store = { workspace = true }
-parquet = { workspace = true }
diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs
index 2605bea56..a8b7c6610 100644
--- a/test-utils/src/lib.rs
+++ b/test-utils/src/lib.rs
@@ -2,44 +2,68 @@
 
 use std::sync::Arc;
 
-use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray};
-use arrow_schema::ArrowError;
+use delta_kernel::arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
+use delta_kernel::arrow::error::ArrowError;
 use delta_kernel::engine::arrow_data::ArrowEngineData;
+use delta_kernel::parquet::arrow::arrow_writer::ArrowWriter;
+use delta_kernel::parquet::file::properties::WriterProperties;
 use delta_kernel::EngineData;
 use itertools::Itertools;
 use object_store::{path::Path, ObjectStore};
-use parquet::arrow::arrow_writer::ArrowWriter;
-use parquet::file::properties::WriterProperties;
 
 /// A common useful initial metadata and protocol. Also includes a single commitInfo
 pub const METADATA: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}}
 {"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
 {"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}}"#;
 
+/// A common useful initial metadata and protocol. Also includes a single commitInfo
+pub const METADATA_WITH_PARTITION_COLS: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}}
+{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
+{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["val"],"configuration":{},"createdTime":1587968585495}}"#;
+
 pub enum TestAction {
     Add(String),
     Remove(String),
     Metadata,
 }
 
-/// Convert a vector of actions into a newline delimited json string
+// TODO: We need a better way to mock tables :)
+
+/// Convert a vector of actions into a newline delimited json string, with standard metadata
 pub fn actions_to_string(actions: Vec<TestAction>) -> String {
+    actions_to_string_with_metadata(actions, METADATA)
+}
+
+/// Convert a vector of actions into a newline delimited json string, with metadata including a partition column
+pub fn actions_to_string_partitioned(actions: Vec<TestAction>) -> String {
+    actions_to_string_with_metadata(actions, METADATA_WITH_PARTITION_COLS)
+}
+
+fn actions_to_string_with_metadata(actions: Vec<TestAction>, metadata: &str) -> String {
     actions
-            .into_iter()
-            .map(|test_action| match test_action {
-                TestAction::Add(path) => format!(r#"{{"add":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 1}},\"maxValues\":{{\"id\":3}}}}"}}}}"#),
-                TestAction::Remove(path) => format!(r#"{{"remove":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"#),
-                TestAction::Metadata => METADATA.into(),
-            })
-            .join("\n")
+        .into_iter()
+        .map(|test_action| match test_action {
+            TestAction::Add(path) => format!(r#"{{"add":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 1}},\"maxValues\":{{\"id\":3}}}}"}}}}"#),
+            TestAction::Remove(path) => format!(r#"{{"remove":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"#),
+            TestAction::Metadata => metadata.into(),
+        })
+        .join("\n")
 }
 
 /// convert a RecordBatch into a vector of bytes. We can't use `From` since these are both foreign
 /// types
 pub fn record_batch_to_bytes(batch: &RecordBatch) -> Vec<u8> {
-    let mut data: Vec<u8> = Vec::new();
     let props = WriterProperties::builder().build();
-    let mut writer = ArrowWriter::try_new(&mut data, batch.schema(), Some(props)).unwrap();
+    record_batch_to_bytes_with_props(batch, props)
+}
+
+pub fn record_batch_to_bytes_with_props(
+    batch: &RecordBatch,
+    writer_properties: WriterProperties,
+) -> Vec<u8> {
+    let mut data: Vec<u8> = Vec::new();
+    let mut writer =
+        ArrowWriter::try_new(&mut data, batch.schema(), Some(writer_properties)).unwrap();
     writer.write(batch).expect("Writing batch");
     // writer must be closed to write footer
     writer.close().unwrap();