Skip to content

Commit 18cc561

Browse files
ion-elgrecortyler
authored andcommitted
feat: get earliest version
1 parent 72e344e commit 18cc561

File tree

16 files changed

+100
-27
lines changed

16 files changed

+100
-27
lines changed

crates/aws/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "deltalake-aws"
3-
version = "0.3.0"
3+
version = "0.4.0"
44
authors.workspace = true
55
keywords.workspace = true
66
readme.workspace = true
@@ -12,7 +12,7 @@ repository.workspace = true
1212
rust-version.workspace = true
1313

1414
[dependencies]
15-
deltalake-core = { version = "0.20.0", path = "../core" }
15+
deltalake-core = { version = "0.21.0", path = "../core" }
1616
aws-smithy-runtime-api = { version="1.7" }
1717
aws-smithy-runtime = { version="1.7", optional = true}
1818
aws-credential-types = { version="1.2", features = ["hardcoded-credentials"]}

crates/aws/src/logstore/default_logstore.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,8 @@
33
use std::sync::Arc;
44

55
use bytes::Bytes;
6+
use deltalake_core::logstore::*;
67
use deltalake_core::{
7-
logstore::{
8-
abort_commit_entry, get_latest_version, read_commit_entry, write_commit_entry,
9-
CommitOrBytes, LogStore, LogStoreConfig,
10-
},
118
operations::transaction::TransactionError,
129
storage::{ObjectStoreRef, StorageOptions},
1310
DeltaResult,
@@ -103,6 +100,10 @@ impl LogStore for S3LogStore {
103100
get_latest_version(self, current_version).await
104101
}
105102

103+
async fn get_earliest_version(&self, current_version: i64) -> DeltaResult<i64> {
104+
get_earliest_version(self, current_version).await
105+
}
106+
106107
fn object_store(&self) -> Arc<dyn ObjectStore> {
107108
self.storage.clone()
108109
}

crates/aws/src/logstore/dynamodb_logstore.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,10 @@ impl LogStore for S3DynamoDbLogStore {
296296
}
297297
}
298298

299+
async fn get_earliest_version(&self, current_version: i64) -> DeltaResult<i64> {
300+
get_earliest_version(self, current_version).await
301+
}
302+
299303
fn object_store(&self) -> ObjectStoreRef {
300304
self.storage.clone()
301305
}

crates/azure/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "deltalake-azure"
3-
version = "0.3.0"
3+
version = "0.4.0"
44
authors.workspace = true
55
keywords.workspace = true
66
readme.workspace = true
@@ -12,7 +12,7 @@ repository.workspace = true
1212
rust-version.workspace = true
1313

1414
[dependencies]
15-
deltalake-core = { version = "0.20.0", path = "../core" }
15+
deltalake-core = { version = "0.21.0", path = "../core" }
1616
lazy_static = "1"
1717

1818
# workspace depenndecies

crates/catalog-glue/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "deltalake-catalog-glue"
3-
version = "0.4.0"
3+
version = "0.5.0"
44
authors.workspace = true
55
keywords.workspace = true
66
readme.workspace = true
@@ -15,7 +15,7 @@ rust-version.workspace = true
1515
async-trait = { workspace = true }
1616
aws-config = "1"
1717
aws-sdk-glue = "1"
18-
deltalake-core = { version = "0.20.0", path = "../core" }
18+
deltalake-core = { version = "0.21.0", path = "../core" }
1919
thiserror = { workspace = true }
2020

2121
[dev-dependencies]

crates/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "deltalake-core"
3-
version = "0.20.1"
3+
version = "0.21.0"
44
authors.workspace = true
55
keywords.workspace = true
66
readme.workspace = true

crates/core/src/logstore/default_logstore.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ impl LogStore for DefaultLogStore {
9797
super::get_latest_version(self, current_version).await
9898
}
9999

100+
async fn get_earliest_version(&self, current_version: i64) -> DeltaResult<i64> {
101+
super::get_earliest_version(self, current_version).await
102+
}
103+
100104
fn object_store(&self) -> Arc<dyn ObjectStore> {
101105
self.storage.clone()
102106
}

crates/core/src/logstore/mod.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
//! Delta log store.
2+
use std::cmp::min;
23
use std::io::{BufRead, BufReader, Cursor};
34
use std::sync::OnceLock;
45
use std::{cmp::max, collections::HashMap, sync::Arc};
56

67
use bytes::Bytes;
78
use dashmap::DashMap;
8-
use futures::StreamExt;
9+
use futures::{StreamExt, TryStreamExt};
910
use lazy_static::lazy_static;
1011
use object_store::{path::Path, Error as ObjectStoreError, ObjectStore};
1112
use regex::Regex;
@@ -213,6 +214,9 @@ pub trait LogStore: Sync + Send {
213214
/// Find latest version currently stored in the delta log.
214215
async fn get_latest_version(&self, start_version: i64) -> DeltaResult<i64>;
215216

217+
/// Find earliest version currently stored in the delta log.
218+
async fn get_earliest_version(&self, start_version: i64) -> DeltaResult<i64>;
219+
216220
/// Get underlying object store.
217221
fn object_store(&self) -> Arc<dyn ObjectStore>;
218222

@@ -441,6 +445,52 @@ pub async fn get_latest_version(
441445
Ok(version)
442446
}
443447

448+
/// Default implementation for retrieving the earliest version
449+
pub async fn get_earliest_version(
450+
log_store: &dyn LogStore,
451+
current_version: i64,
452+
) -> DeltaResult<i64> {
453+
let version_start = match get_last_checkpoint(log_store).await {
454+
Ok(last_check_point) => last_check_point.version,
455+
Err(ProtocolError::CheckpointNotFound) => {
456+
// no checkpoint so start from current_version
457+
current_version
458+
}
459+
Err(e) => {
460+
return Err(DeltaTableError::from(e));
461+
}
462+
};
463+
464+
// list files to find min version
465+
let version = async {
466+
let mut min_version: i64 = version_start;
467+
let prefix = Some(log_store.log_path());
468+
let offset_path = commit_uri_from_version(version_start);
469+
let object_store = log_store.object_store();
470+
471+
// Manually filter until we can provide direction in https://github.com/apache/arrow-rs/issues/6274
472+
let mut files = object_store
473+
.list(prefix)
474+
.try_filter(move |f| futures::future::ready(f.location < offset_path))
475+
.boxed();
476+
477+
while let Some(obj_meta) = files.next().await {
478+
let obj_meta = obj_meta?;
479+
if let Some(log_version) = extract_version_from_filename(obj_meta.location.as_ref()) {
480+
min_version = min(min_version, log_version);
481+
}
482+
}
483+
484+
if min_version < 0 {
485+
return Err(DeltaTableError::not_a_table(log_store.root_uri()));
486+
}
487+
488+
Ok::<i64, DeltaTableError>(min_version)
489+
}
490+
.await?;
491+
Ok(version)
492+
}
493+
444494
/// Read delta log for a specific version
445495
pub async fn read_commit_entry(
446496
storage: &dyn ObjectStore,

crates/core/src/table/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ impl DeltaTable {
311311
self.log_store.get_latest_version(self.version()).await
312312
}
313313

314+
/// returns the earliest available version of the table
315+
pub async fn get_earliest_version(&self) -> Result<i64, DeltaTableError> {
316+
self.log_store.get_earliest_version(self.version()).await
317+
}
318+
314319
/// Currently loaded version of the table
315320
pub fn version(&self) -> i64 {
316321
self.state.as_ref().map(|s| s.version()).unwrap_or(-1)

crates/deltalake/Cargo.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "deltalake"
3-
version = "0.20.1"
3+
version = "0.21.0"
44
authors.workspace = true
55
keywords.workspace = true
66
readme.workspace = true
@@ -16,12 +16,12 @@ rust-version.workspace = true
1616
features = ["azure", "datafusion", "gcs", "hdfs", "json", "python", "s3", "unity-experimental"]
1717

1818
[dependencies]
19-
deltalake-core = { version = "0.20.0", path = "../core" }
20-
deltalake-aws = { version = "0.3.0", path = "../aws", default-features = false, optional = true }
21-
deltalake-azure = { version = "0.3.0", path = "../azure", optional = true }
22-
deltalake-gcp = { version = "0.4.0", path = "../gcp", optional = true }
23-
deltalake-hdfs = { version = "0.4.0", path = "../hdfs", optional = true }
24-
deltalake-catalog-glue = { version = "0.4.0", path = "../catalog-glue", optional = true }
19+
deltalake-core = { version = "0.21.0", path = "../core" }
20+
deltalake-aws = { version = "0.4.0", path = "../aws", default-features = false, optional = true }
21+
deltalake-azure = { version = "0.4.0", path = "../azure", optional = true }
22+
deltalake-gcp = { version = "0.5.0", path = "../gcp", optional = true }
23+
deltalake-hdfs = { version = "0.5.0", path = "../hdfs", optional = true }
24+
deltalake-catalog-glue = { version = "0.5.0", path = "../catalog-glue", optional = true }
2525

2626
[features]
2727
# All of these features are just reflected into the core crate until that

0 commit comments

Comments
 (0)