diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 1dc7a29cc1..837a6cd51d 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1691,7 +1691,21 @@ def to_polars(self) -> pl.DataFrame: ... def update(self: S, **overrides: Any) -> S: """Create a copy of this table scan with updated fields.""" - return type(self)(**{**self.__dict__, **overrides}) + return type(self)(**{**self._arguments, **overrides}) + + @property + def _arguments(self) -> dict[str, Any]: + """Return the arguments for TableScan creation. Subclasses with additional constructor arguments should override this to include them.""" + return { + "table_metadata": self.table_metadata, + "io": self.io, + "row_filter": self.row_filter, + "selected_fields": self.selected_fields, + "case_sensitive": self.case_sensitive, + "snapshot_id": self.snapshot_id, + "options": self.options, + "limit": self.limit, + } def use_ref(self: S, name: str) -> S: if self.snapshot_id: diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index b417a43616..3aac579011 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -1024,3 +1024,16 @@ def test_scan_with_datetime(catalog: Catalog) -> None: df = table.scan(row_filter=LessThan("datetime", yesterday)).to_pandas() assert len(df) == 0 + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_filter_after_arrow_scan(catalog: Catalog) -> None: + identifier = "test_partitioned_by_hours" + table = catalog.load_table(f"default.{identifier}") + + scan = table.scan() + assert len(scan.to_arrow()) > 0 + + scan = scan.filter("ts >= '2023-03-05T00:00:00+00:00'") + assert len(scan.to_arrow()) > 0