From d8e6e6fef8a46da2052800d7b77812894bf12ac0 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 16 May 2024 12:19:17 +0530 Subject: [PATCH 01/16] add snapshot_by_timestamp API --- pyiceberg/table/__init__.py | 4 ++++ pyiceberg/table/metadata.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index f160ab2441..bf7a2a409c 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1302,6 +1302,10 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: return self.snapshot_by_id(ref.snapshot_id) return None + def snapshot_by_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: + """Get the snapshot right before the given timestamp, or None if there is no matching snapshot.""" + return self.metadata.snapshot_by_timestamp(timestamp_ms) + def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" return self.metadata.snapshot_log diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 8c3c389318..9a67471ef8 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -230,6 +230,14 @@ def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]: """Get the snapshot by snapshot_id.""" return next((snapshot for snapshot in self.snapshots if snapshot.snapshot_id == snapshot_id), None) + def snapshot_by_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: + """Get the snapshot right before the given timestamp.""" + result, prev_timestamp = None, 0 + for snapshot in self.snapshots: + if prev_timestamp < snapshot.timestamp_ms < timestamp_ms: + result, prev_timestamp = snapshot, snapshot.timestamp_ms + return result if result else None + def schema_by_id(self, schema_id: int) -> Optional[Schema]: """Get the schema by schema_id.""" return next((schema for schema in self.schemas if schema.schema_id == schema_id), None) From 854d1342e4f32cd4085b4f2ddf8f64fdece093f2 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 16 May 2024 12:20:18 +0530 Subject: [PATCH 02/16] add test --- tests/table/test_init.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 11d50db8a5..75577206ff 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -204,6 +204,18 @@ def test_snapshot_by_id(table_v2: Table) -> None: ) +def test_snapshot_by_timestamp(table_v2: Table) -> None: + assert table_v2.snapshot_by_timestamp(1555100955771) == Snapshot( + snapshot_id=3055729675574597004, + parent_snapshot_id=3051729675574597004, + sequence_number=1, + timestamp_ms=1555100955770, + manifest_list="s3://a/b/2.avro", + summary=Summary(operation=Operation.APPEND), + schema_id=1, + ) + + def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None: assert table_v2.snapshot_by_id(-1) is None From aa5fde59e9cca09f89a1d7c39362a7a5b06f7225 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 16 May 2024 18:42:19 +0530 Subject: [PATCH 03/16] rename api to latest_snapshot_before_timestamp --- pyiceberg/table/__init__.py | 4 ++-- pyiceberg/table/metadata.py | 2 +- tests/table/test_init.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index bf7a2a409c..5dab8f5329 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1302,9 +1302,9 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: return self.snapshot_by_id(ref.snapshot_id) return None - def snapshot_by_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: + def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: """Get the snapshot right before the given timestamp, or None if there is no matching snapshot.""" - return self.metadata.snapshot_by_timestamp(timestamp_ms) + return self.metadata.latest_snapshot_before_timestamp(timestamp_ms) def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 9a67471ef8..6b02e71987 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -230,7 +230,7 @@ def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]: """Get the snapshot by snapshot_id.""" return next((snapshot for snapshot in self.snapshots if snapshot.snapshot_id == snapshot_id), None) - def snapshot_by_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: + def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: """Get the snapshot right before the given timestamp.""" result, prev_timestamp = None, 0 for snapshot in self.snapshots: diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 75577206ff..10994e04f2 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -205,7 +205,7 @@ def test_snapshot_by_id(table_v2: Table) -> None: def test_snapshot_by_timestamp(table_v2: Table) -> None: - assert table_v2.snapshot_by_timestamp(1555100955771) == Snapshot( + assert table_v2.latest_snapshot_before_timestamp(1555100955771) == Snapshot( snapshot_id=3055729675574597004, parent_snapshot_id=3051729675574597004, sequence_number=1, From c11d1b956296e72fe424587fca1f73ac4818b0cc Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 16 May 2024 21:48:32 +0530 Subject: [PATCH 04/16] add ancestors_of API --- pyiceberg/table/metadata.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 6b02e71987..73b67d4ce7 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -238,6 +238,18 @@ def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapsh result, prev_timestamp = snapshot, snapshot.timestamp_ms return result if result else None + def ancestors_of(self, snapshot_id: int) -> List[tuple[int, int]]: + """Get the snapshot_id of the ancestors of the given snapshot.""" + current_id: Optional[int] = snapshot_id + result = [] + while current_id is not None: + snapshot = self.snapshot_by_id(current_id) + if not snapshot: + break + result.append((current_id, snapshot.timestamp_ms)) + current_id = snapshot.parent_snapshot_id + return result + def schema_by_id(self, schema_id: int) -> Optional[Schema]: """Get the schema by schema_id.""" return next((schema for schema in self.schemas if schema.schema_id == schema_id), None) From 7c3eb22b384011c52cd3c148ff8cfa0df2ccf472 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 16 May 2024 21:51:40 +0530 Subject: [PATCH 05/16] iterate over ancestors_of() --- pyiceberg/table/metadata.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 73b67d4ce7..5519259cb8 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -233,10 +233,12 @@ def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]: def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: """Get the snapshot right before the given timestamp.""" result, prev_timestamp = None, 0 - for snapshot in self.snapshots: - if prev_timestamp < snapshot.timestamp_ms < timestamp_ms: - result, prev_timestamp = snapshot, snapshot.timestamp_ms - return result if result else None + if self.current_snapshot_id is not None: + for snapshot_id, snapshot_timestamp in self.ancestors_of(self.current_snapshot_id): + snapshot = self.snapshot_by_id(snapshot_id) + if prev_timestamp < snapshot_timestamp < timestamp_ms: + result, prev_timestamp = snapshot, snapshot_timestamp + return result def ancestors_of(self, snapshot_id: int) -> List[tuple[int, int]]: """Get the snapshot_id of the ancestors of the given snapshot.""" From cfbb2c6bc1b1b335e8deb84d24fcb2c37412b93a Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Thu, 16 May 2024 21:57:30 +0530 Subject: [PATCH 06/16] add test for ancestors_of() --- tests/table/test_init.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 10994e04f2..7068dc2cb4 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -216,6 +216,13 @@ def test_snapshot_by_timestamp(table_v2: Table) -> None: ) +def test_ancestors_of(table_v2: Table) -> None: + assert table_v2.metadata.ancestors_of(3055729675574597004) == [ + (3055729675574597004, 1555100955770), + (3051729675574597004, 1515100955770), + ] + + def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None: assert table_v2.snapshot_by_id(-1) is None From b29d2f17709952e9fb874377474a1a01d16284cd Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Fri, 17 May 2024 14:34:56 +0530 Subject: [PATCH 07/16] re-write ancestors_of() to return Iterable[Snapshot] --- pyiceberg/table/__init__.py | 12 +++++++++++- pyiceberg/table/metadata.py | 22 ---------------------- pyiceberg/table/snapshots.py | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 5dab8f5329..7cccdb4298 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -113,6 +113,7 @@ SnapshotLogEntry, SnapshotSummaryCollector, Summary, + ancestors_of, update_snapshot_summaries, ) from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder @@ -1304,7 +1305,16 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: """Get the snapshot right before the given timestamp, or None if there is no matching snapshot.""" - return self.metadata.latest_snapshot_before_timestamp(timestamp_ms) + result, prev_timestamp = None, 0 + if self.metadata.current_snapshot_id is not None: + for snapshot in self.current_ancestors(): + if snapshot and prev_timestamp < snapshot.timestamp_ms < timestamp_ms: + result, prev_timestamp = snapshot, snapshot.timestamp_ms + return result + + def current_ancestors(self) -> List[Optional[Snapshot]]: + """Get a list of ancestors of and including the current snapshot.""" + return list(ancestors_of(self.current_snapshot(), self.metadata)) # type: ignore def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 5519259cb8..8c3c389318 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -230,28 +230,6 @@ def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]: """Get the snapshot by snapshot_id.""" return next((snapshot for snapshot in self.snapshots if snapshot.snapshot_id == snapshot_id), None) - def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: - """Get the snapshot right before the given timestamp.""" - result, prev_timestamp = None, 0 - if self.current_snapshot_id is not None: - for snapshot_id, snapshot_timestamp in self.ancestors_of(self.current_snapshot_id): - snapshot = self.snapshot_by_id(snapshot_id) - if prev_timestamp < snapshot_timestamp < timestamp_ms: - result, prev_timestamp = snapshot, snapshot_timestamp - return result - - def ancestors_of(self, snapshot_id: int) -> List[tuple[int, int]]: - """Get the snapshot_id of the ancestors of the given snapshot.""" - current_id: Optional[int] = snapshot_id - result = [] - while current_id is not None: - snapshot = self.snapshot_by_id(current_id) - if not snapshot: - break - result.append((current_id, snapshot.timestamp_ms)) - current_id = snapshot.parent_snapshot_id - return result - def schema_by_id(self, schema_id: int) -> Optional[Schema]: """Get the schema by schema_id.""" return next((schema for schema in self.schemas if schema.schema_id == schema_id), None) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index e2ce3fe4f1..735e249eb1 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -14,10 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + import time from collections import defaultdict from enum import Enum -from typing import Any, DefaultDict, Dict, List, Mapping, Optional +from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional from pydantic import Field, PrivateAttr, model_serializer @@ -25,6 +27,9 @@ from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, read_manifest_list from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema + +if TYPE_CHECKING: + from pyiceberg.table.metadata import TableMetadata from pyiceberg.typedef import IcebergBaseModel ADDED_DATA_FILES = "added-data-files" @@ -412,3 +417,12 @@ def _update_totals(total_property: str, added_property: str, removed_property: s def set_when_positive(properties: Dict[str, str], num: int, property_name: str) -> None: if num > 0: properties[property_name] = str(num) + + +def ancestors_of(current_snapshot: Snapshot, table_metadata: TableMetadata) -> Iterable[Snapshot]: + """Get the ancestors of and including the given snapshot.""" + if current_snapshot: + yield current_snapshot + if current_snapshot.parent_snapshot_id: + if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id): + yield from ancestors_of(parent, table_metadata) From 2155e0dc0aa3024db13b4fbe5e3981f29aaecb6c Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Fri, 17 May 2024 15:03:20 +0530 Subject: [PATCH 08/16] update test --- tests/table/test_init.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 7068dc2cb4..1948a4ad56 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -216,10 +216,26 @@ def test_snapshot_by_timestamp(table_v2: Table) -> None: ) -def test_ancestors_of(table_v2: Table) -> None: - assert table_v2.metadata.ancestors_of(3055729675574597004) == [ - (3055729675574597004, 1555100955770), - (3051729675574597004, 1515100955770), +def test_current_ancestors(table_v2: Table) -> None: + assert table_v2.current_ancestors() == [ + Snapshot( + snapshot_id=3055729675574597004, + parent_snapshot_id=3051729675574597004, + sequence_number=1, + timestamp_ms=1555100955770, + manifest_list='s3://a/b/2.avro', + summary=Summary(Operation.APPEND), + schema_id=1, + ), + Snapshot( + snapshot_id=3051729675574597004, + parent_snapshot_id=None, + sequence_number=0, + timestamp_ms=1515100955770, + manifest_list='s3://a/b/1.avro', + summary=Summary(Operation.APPEND), + schema_id=None, + ), ] From b7ffe258066b2b26fd32f619246824cc69decf9b Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Fri, 17 May 2024 19:47:24 +0530 Subject: [PATCH 09/16] fix case when parent_snapshot_id is 0 --- pyiceberg/table/snapshots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 735e249eb1..1fa50ced6b 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -423,6 +423,6 @@ def ancestors_of(current_snapshot: Snapshot, table_metadata: TableMetadata) -> I """Get the ancestors of and including the given snapshot.""" if current_snapshot: yield current_snapshot - if current_snapshot.parent_snapshot_id: + if current_snapshot.parent_snapshot_id is not None: if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id): yield from ancestors_of(parent, table_metadata) From ad9c08ce8cce698a6e5d2b4c6c94c6d7dbdb526c Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Tue, 28 May 2024 13:00:43 +0530 Subject: [PATCH 10/16] updates --- pyiceberg/table/__init__.py | 18 ++++++++---------- pyiceberg/table/snapshots.py | 6 +++--- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 7cccdb4298..551ab191c1 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1303,18 +1303,16 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: return self.snapshot_by_id(ref.snapshot_id) return None - def latest_snapshot_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: - """Get the snapshot right before the given timestamp, or None if there is no matching snapshot.""" - result, prev_timestamp = None, 0 - if self.metadata.current_snapshot_id is not None: - for snapshot in self.current_ancestors(): - if snapshot and prev_timestamp < snapshot.timestamp_ms < timestamp_ms: - result, prev_timestamp = snapshot, snapshot.timestamp_ms - return result + def snapshot_at_or_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: + """Get the snapshot that was current at or right before the given timestamp, or None if there is no matching snapshot.""" + for log_entry in reversed(self.history()): + if log_entry.timestamp_ms <= timestamp_ms: + return self.snapshot_by_id(log_entry.snapshot_id) + return None - def current_ancestors(self) -> List[Optional[Snapshot]]: + def current_ancestors(self) -> Iterable[Snapshot]: """Get a list of ancestors of and including the current snapshot.""" - return list(ancestors_of(self.current_snapshot(), self.metadata)) # type: ignore + return ancestors_of(self.current_snapshot(), self.metadata) # type: ignore def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 1fa50ced6b..0b6a7d96d1 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -423,6 +423,6 @@ def ancestors_of(current_snapshot: Snapshot, table_metadata: TableMetadata) -> I """Get the ancestors of and including the given snapshot.""" if current_snapshot: yield current_snapshot - if current_snapshot.parent_snapshot_id is not None: - if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id): - yield from ancestors_of(parent, table_metadata) + if current_snapshot.parent_snapshot_id is not None: + if parent := table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id): + yield from ancestors_of(parent, table_metadata) From ebe0b1d5c5cfa90d0c84a44b03ba2d58e64910a7 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Tue, 28 May 2024 13:54:23 +0530 Subject: [PATCH 11/16] update tests --- tests/table/test_init.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 1948a4ad56..8a0453d0f9 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -205,7 +205,7 @@ def test_snapshot_by_id(table_v2: Table) -> None: def test_snapshot_by_timestamp(table_v2: Table) -> None: - assert table_v2.latest_snapshot_before_timestamp(1555100955771) == Snapshot( + assert table_v2.snapshot_at_or_before_timestamp(1555100955771) == Snapshot( snapshot_id=3055729675574597004, parent_snapshot_id=3051729675574597004, sequence_number=1, @@ -217,7 +217,7 @@ def test_snapshot_by_timestamp(table_v2: Table) -> None: def test_current_ancestors(table_v2: Table) -> None: - assert table_v2.current_ancestors() == [ + assert list(table_v2.current_ancestors()) == [ Snapshot( snapshot_id=3055729675574597004, parent_snapshot_id=3051729675574597004, From 9affc45cb6e8ca539249ff4f06968800f3f6f4e0 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Fri, 31 May 2024 15:34:15 +0530 Subject: [PATCH 12/16] update inclusive behaviour and test --- pyiceberg/table/__init__.py | 11 ++++++++--- tests/table/test_init.py | 17 +++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 551ab191c1..6082a8b69d 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1303,10 +1303,15 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]: return self.snapshot_by_id(ref.snapshot_id) return None - def snapshot_at_or_before_timestamp(self, timestamp_ms: int) -> Optional[Snapshot]: - """Get the snapshot that was current at or right before the given timestamp, or None if there is no matching snapshot.""" + def snapshot_as_of_timestamp(self, timestamp_ms: int, inclusive: bool = True) -> Optional[Snapshot]: + """Get the snapshot that was current as of or right before the given timestamp, or None if there is no matching snapshot. + + Args: + timestamp_ms: Find snapshot that was current at/before this timestamp + inclusive: Includes timestamp_ms in search when True. Excludes timestamp_ms when False + """ for log_entry in reversed(self.history()): - if log_entry.timestamp_ms <= timestamp_ms: + if (inclusive and log_entry.timestamp_ms <= timestamp_ms) or log_entry.timestamp_ms < timestamp_ms: return self.snapshot_by_id(log_entry.snapshot_id) return None diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 8a0453d0f9..7bad28e745 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -205,15 +205,16 @@ def test_snapshot_by_id(table_v2: Table) -> None: def test_snapshot_by_timestamp(table_v2: Table) -> None: - assert table_v2.snapshot_at_or_before_timestamp(1555100955771) == Snapshot( - snapshot_id=3055729675574597004, - parent_snapshot_id=3051729675574597004, - sequence_number=1, - timestamp_ms=1555100955770, - manifest_list="s3://a/b/2.avro", - summary=Summary(operation=Operation.APPEND), - schema_id=1, + assert table_v2.snapshot_as_of_timestamp(1515100955770) == Snapshot( + snapshot_id=3051729675574597004, + parent_snapshot_id=None, + sequence_number=0, + timestamp_ms=1515100955770, + manifest_list='s3://a/b/1.avro', + summary=Summary(Operation.APPEND), + schema_id=None, ) + assert table_v2.snapshot_as_of_timestamp(1515100955770, inclusive=False) is None def test_current_ancestors(table_v2: Table) -> None: From a1ae5ca9dc7b5be8f5c5ee6e7194af2e9c95346b Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Fri, 31 May 2024 15:36:14 +0530 Subject: [PATCH 13/16] remove mypy ignore --- pyiceberg/table/__init__.py | 2 +- pyiceberg/table/snapshots.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 6082a8b69d..0e04ae6417 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1317,7 +1317,7 @@ def snapshot_as_of_timestamp(self, timestamp_ms: int, inclusive: bool = True) -> def current_ancestors(self) -> Iterable[Snapshot]: """Get a list of ancestors of and including the current snapshot.""" - return ancestors_of(self.current_snapshot(), self.metadata) # type: ignore + return ancestors_of(self.current_snapshot(), self.metadata) def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index 0b6a7d96d1..b21a0f5613 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -419,7 +419,7 @@ def set_when_positive(properties: Dict[str, str], num: int, property_name: str) properties[property_name] = str(num) -def ancestors_of(current_snapshot: Snapshot, table_metadata: TableMetadata) -> Iterable[Snapshot]: +def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMetadata) -> Iterable[Snapshot]: """Get the ancestors of and including the given snapshot.""" if current_snapshot: yield current_snapshot From 674a4196ac72ce5054d3b284369ccd98fe808cfc Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:43:25 +0530 Subject: [PATCH 14/16] remove current_ancestors(), update test --- pyiceberg/table/__init__.py | 4 ---- tests/table/test_init.py | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 0e04ae6417..15383bd039 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1315,10 +1315,6 @@ def snapshot_as_of_timestamp(self, timestamp_ms: int, inclusive: bool = True) -> return self.snapshot_by_id(log_entry.snapshot_id) return None - def current_ancestors(self) -> Iterable[Snapshot]: - """Get a list of ancestors of and including the current snapshot.""" - return ancestors_of(self.current_snapshot(), self.metadata) - def history(self) -> List[SnapshotLogEntry]: """Get the snapshot history of this table.""" return self.metadata.snapshot_log diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 7bad28e745..99e2688d86 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -75,7 +75,7 @@ Operation, Snapshot, SnapshotLogEntry, - Summary, + Summary, ancestors_of, ) from pyiceberg.table.sorting import ( NullOrder, @@ -217,8 +217,8 @@ def test_snapshot_by_timestamp(table_v2: Table) -> None: assert table_v2.snapshot_as_of_timestamp(1515100955770, inclusive=False) is None -def test_current_ancestors(table_v2: Table) -> None: - assert list(table_v2.current_ancestors()) == [ +def test_ancestors_of(table_v2: Table) -> None: + assert list(ancestors_of(table_v2.current_snapshot(), table_v2.metadata)) == [ Snapshot( snapshot_id=3055729675574597004, parent_snapshot_id=3051729675574597004, From 3ad62184e32050edb2f4e2248eac20b78907b166 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Mon, 3 Jun 2024 15:44:06 +0530 Subject: [PATCH 15/16] fix lint --- pyiceberg/table/__init__.py | 1 - tests/table/test_init.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 15383bd039..2d4b342461 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -113,7 +113,6 @@ SnapshotLogEntry, SnapshotSummaryCollector, Summary, - ancestors_of, update_snapshot_summaries, ) from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 99e2688d86..f64352d92a 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -75,7 +75,8 @@ Operation, Snapshot, SnapshotLogEntry, - Summary, ancestors_of, + Summary, + ancestors_of, ) from pyiceberg.table.sorting import ( NullOrder, From 287b38687f79cf939e49f3a46827ca49a7068ab3 Mon Sep 17 00:00:00 2001 From: chinmay-bhat <12948588+chinmay-bhat@users.noreply.github.com> Date: Mon, 3 Jun 2024 21:50:20 +0530 Subject: [PATCH 16/16] fix lint after rebasing onto main --- tests/table/test_init.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/table/test_init.py b/tests/table/test_init.py index f64352d92a..20b77b6abd 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -211,7 +211,7 @@ def test_snapshot_by_timestamp(table_v2: Table) -> None: parent_snapshot_id=None, sequence_number=0, timestamp_ms=1515100955770, - manifest_list='s3://a/b/1.avro', + manifest_list="s3://a/b/1.avro", summary=Summary(Operation.APPEND), schema_id=None, ) @@ -225,7 +225,7 @@ def test_ancestors_of(table_v2: Table) -> None: parent_snapshot_id=3051729675574597004, sequence_number=1, timestamp_ms=1555100955770, - manifest_list='s3://a/b/2.avro', + manifest_list="s3://a/b/2.avro", summary=Summary(Operation.APPEND), schema_id=1, ), @@ -234,7 +234,7 @@ def test_ancestors_of(table_v2: Table) -> None: parent_snapshot_id=None, sequence_number=0, timestamp_ms=1515100955770, - manifest_list='s3://a/b/1.avro', + manifest_list="s3://a/b/1.avro", summary=Summary(Operation.APPEND), schema_id=None, ),