diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 333e813ac3..6c6da2a9b7 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1702,7 +1702,14 @@ def to_polars(self) -> pl.DataFrame: ... def update(self: S, **overrides: Any) -> S: """Create a copy of this table scan with updated fields.""" - return type(self)(**{**self.__dict__, **overrides}) + from inspect import signature + + # Extract those attributes that are constructor parameters. We don't use self.__dict__ as the kwargs to the + # constructors because it may contain additional attributes that are not part of the constructor signature. + params = signature(type(self).__init__).parameters.keys() - {"self"} # Skip "self" parameter + kwargs = {param: getattr(self, param) for param in params} # Assume parameters are attributes + + return type(self)(**{**kwargs, **overrides}) def use_ref(self: S, name: str) -> S: if self.snapshot_id: diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 1b04b63733..96844fd995 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -1057,3 +1057,16 @@ def test_initial_default(catalog: Catalog, spark: SparkSession) -> None: result_table = tbl.scan().filter("so_true == True").to_arrow() assert len(result_table) == 10 + + +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_filter_after_arrow_scan(catalog: Catalog) -> None: + identifier = "test_partitioned_by_hours" + table = catalog.load_table(f"default.{identifier}") + + scan = table.scan() + assert len(scan.to_arrow()) > 0 + + scan = scan.filter("ts >= '2023-03-05T00:00:00+00:00'") + assert len(scan.to_arrow()) > 0