Skip to content

Commit f8c945a

Browse files
committed
allow passing arbitrary transaction_rid
1 parent d243994 commit f8c945a

File tree

2 files changed

+27
-9
lines changed

2 files changed

+27
-9
lines changed

libs/foundry-dev-tools/src/foundry_dev_tools/resources/dataset.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ def to_polars(self) -> pl.DataFrame:
800800
"""
801801
return self.query_foundry_sql("SELECT *", return_type="polars")
802802

803-
def to_lazy_polars(self) -> pl.LazyFrame:
803+
def to_lazy_polars(self, transaction_rid: str | None = None) -> pl.LazyFrame:
804804
"""Get dataset as a :py:class:`polars.LazyFrame`.
805805
806806
Returns a lazy polars DataFrame that can be queried efficiently using
@@ -824,14 +824,17 @@ def to_lazy_polars(self) -> pl.LazyFrame:
824824
"""
825825
from foundry_dev_tools._optional.polars import pl
826826

827-
last_transaction = self.get_last_transaction()
828-
if last_transaction is None:
829-
msg = f"Dataset has no transactions: {self.rid=}"
830-
raise DatasetHasNoTransactionsError(info=msg)
827+
if transaction_rid is None:
828+
maybe_transaction = self.get_last_transaction()
829+
if maybe_transaction is None:
830+
msg = f"Dataset has no transactions: {self.rid=}"
831+
raise DatasetHasNoTransactionsError(info=msg)
832+
transaction_rid = maybe_transaction["rid"]
831833

832834
return pl.scan_parquet(
833-
f"s3://{self.rid}.{last_transaction['rid']}/",
835+
f"s3://{self.rid}.{transaction_rid}/**/*.parquet",
834836
storage_options=self._context.s3.get_polars_storage_options(),
837+
hive_partitioning=True,
835838
)
836839

837840
@contextmanager

tests/unit/resources/test_dataset.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,28 @@ def test_to_lazy_polars_no_transaction():
1212
ds.rid = "ri.foundry.main.dataset.test-dataset"
1313
ds.path = "/test/dataset/path"
1414

15-
# Mock get_last_transaction to return None
1615
with mock.patch.object(ds, "get_last_transaction", return_value=None):
17-
# Assert that the correct exception is raised with the expected message
1816
with pytest.raises(DatasetHasNoTransactionsError) as exc_info:
1917
ds.to_lazy_polars()
2018

21-
# Verify the error message contains the expected information
2219
error_message = str(exc_info.value)
2320
assert "Dataset has no transactions" in error_message
2421
assert ds.rid in error_message
22+
23+
24+
def test_to_lazy_polars_transaction_rid_logic():
25+
with mock.patch.object(Dataset, "__created__", True):
26+
ds = Dataset.__new__(Dataset)
27+
ds.rid = "ri.foundry.main.dataset.abc123"
28+
ds._context = mock.MagicMock()
29+
ds._context.s3.get_polars_storage_options.return_value = {"aws_access_key_id": "test"}
30+
31+
with mock.patch("foundry_dev_tools._optional.polars.pl.scan_parquet") as mock_scan:
32+
mock_scan.return_value = mock.MagicMock()
33+
ds.to_lazy_polars(transaction_rid="test")
34+
35+
mock_scan.assert_called_once()
36+
call_args = mock_scan.call_args
37+
assert call_args[0][0] == f"s3://{ds.rid}.test/**/*.parquet"
38+
assert call_args[1]["storage_options"] == ds._context.s3.get_polars_storage_options()
39+
assert call_args[1]["hive_partitioning"] is True

0 commit comments

Comments
 (0)