|
1 | 1 | //! Delta log store. |
| 2 | +use std::cmp::min; |
2 | 3 | use std::io::{BufRead, BufReader, Cursor}; |
3 | 4 | use std::sync::OnceLock; |
4 | 5 | use std::{cmp::max, collections::HashMap, sync::Arc}; |
5 | 6 |
|
6 | 7 | use bytes::Bytes; |
7 | 8 | use dashmap::DashMap; |
8 | | -use futures::StreamExt; |
| 9 | +use futures::{StreamExt, TryStreamExt}; |
9 | 10 | use lazy_static::lazy_static; |
10 | 11 | use object_store::{path::Path, Error as ObjectStoreError, ObjectStore}; |
11 | 12 | use regex::Regex; |
@@ -213,6 +214,9 @@ pub trait LogStore: Sync + Send { |
213 | 214 | /// Find latest version currently stored in the delta log. |
214 | 215 | async fn get_latest_version(&self, start_version: i64) -> DeltaResult<i64>; |
215 | 216 |
|
| 217 | + /// Find earliest version currently stored in the delta log. |
| 218 | + async fn get_earliest_version(&self, start_version: i64) -> DeltaResult<i64>; |
| 219 | + |
216 | 220 | /// Get underlying object store. |
217 | 221 | fn object_store(&self) -> Arc<dyn ObjectStore>; |
218 | 222 |
|
@@ -441,6 +445,52 @@ pub async fn get_latest_version( |
441 | 445 | Ok(version) |
442 | 446 | } |
443 | 447 |
|
| 448 | +/// Default implementation for retrieving the earliest version |
| 449 | +pub async fn get_earliest_version( |
| 450 | + log_store: &dyn LogStore, |
| 451 | + current_version: i64, |
| 452 | +) -> DeltaResult<i64> { |
| 453 | + let version_start = match get_last_checkpoint(log_store).await { |
| 454 | + Ok(last_check_point) => last_check_point.version, |
| 455 | + Err(ProtocolError::CheckpointNotFound) => { |
| 456 | + // no checkpoint so start from current_version |
| 457 | + current_version |
| 458 | + } |
| 459 | + Err(e) => { |
| 460 | + return Err(DeltaTableError::from(e)); |
| 461 | + } |
| 462 | + }; |
| 463 | + |
| 464 | + // list files to find min version |
| 465 | + let version = async { |
| 466 | + let mut min_version: i64 = version_start; |
| 467 | + let prefix = Some(log_store.log_path()); |
| 468 | + let offset_path = commit_uri_from_version(version_start); |
| 469 | + let object_store = log_store.object_store(); |
| 470 | + |
| 471 | + // Manually filter until we can provide direction in https://github.com/apache/arrow-rs/issues/6274 |
| 472 | + let mut files = object_store |
| 473 | + .list(prefix) |
| 474 | + .try_filter(move |f| futures::future::ready(f.location < offset_path)) |
| 475 | + .boxed(); |
| 476 | + |
| 477 | + while let Some(obj_meta) = files.next().await { |
| 478 | + let obj_meta = obj_meta?; |
| 479 | + if let Some(log_version) = extract_version_from_filename(obj_meta.location.as_ref()) { |
| 480 | + min_version = min(min_version, log_version); |
| 481 | + } |
| 482 | + } |
| 483 | + |
| 484 | + if min_version < 0 { |
| 485 | + return Err(DeltaTableError::not_a_table(log_store.root_uri())); |
| 486 | + } |
| 487 | + |
| 488 | + Ok::<i64, DeltaTableError>(min_version) |
| 489 | + } |
| 490 | + .await?; |
| 491 | + Ok(version) |
| 492 | +} |
| 493 | + |
444 | 494 | /// Read delta log for a specific version |
445 | 495 | pub async fn read_commit_entry( |
446 | 496 | storage: &dyn ObjectStore, |
|
0 commit comments