From 6cc1735fa692f02e880af45081c6cdef1e2fe154 Mon Sep 17 00:00:00 2001 From: Fokko Date: Tue, 13 May 2025 18:21:28 +0200 Subject: [PATCH 1/3] Add `snapshot-loading-mode` to RESTCatalog This will allow to set the snapshots to be send back. In case of refs, only the snapshots referenced by branches or tags will be returned: https://github.com/apache/iceberg/blob/5d2230ead79da64a8c871a02eb1304a94aaece5c/open-api/rest-catalog-open-api.yaml#L954-L956 --- pyiceberg/catalog/rest/__init__.py | 14 ++++++++-- tests/catalog/test_rest.py | 43 +++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 8ee9e5fdc9..633ef6498b 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -138,6 +138,7 @@ class IdentifierKind(Enum): SIGV4_REGION = "rest.signing-region" SIGV4_SERVICE = "rest.signing-name" OAUTH2_SERVER_URI = "oauth2-server-uri" +SNAPSHOT_LOADING_MODE = "snapshot-loading-mode" NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8) @@ -678,7 +679,16 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: @retry(**_RETRY_ARGS) def load_table(self, identifier: Union[str, Identifier]) -> Table: - response = self._session.get(self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier))) + params = {} + if mode := self.properties.get(SNAPSHOT_LOADING_MODE): + if mode in {"all", "refs"}: + params["snapshots"] = mode + else: + raise ValueError("Invalid snapshot-loading-mode: {}") + + response = self._session.get( + self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)), params=params + ) try: response.raise_for_status() except HTTPError as exc: @@ -816,7 +826,7 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi try: response.raise_for_status() except HTTPError as exc: - self._handle_non_200_response(exc, {}) + self._handle_non_200_response(exc, {404: NoSuchNamespaceError}) return ListNamespaceResponse.model_validate_json(response.text).namespaces diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index f2fc6ceb6b..dfc6a0e2fb 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -24,7 +24,7 @@ import pyiceberg from pyiceberg.catalog import PropertiesUpdateSummary, load_catalog -from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, RestCatalog +from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, RestCatalog from pyiceberg.exceptions import ( AuthorizationExpiredError, NamespaceAlreadyExistsError, @@ -555,6 +555,24 @@ def test_list_namespace_with_parent_200(rest_mock: Mocker) -> None: ] +def test_list_namespace_with_parent_404(rest_mock: Mocker) -> None: + rest_mock.get( + f"{TEST_URI}v1/namespaces?parent=some_namespace", + json={ + "error": { + "message": "Namespace provided in the `parent` query parameter is not found", + "type": "NoSuchNamespaceException", + "code": 404, + } + }, + status_code=404, + request_headers=TEST_HEADERS, + ) + + with pytest.raises(NoSuchNamespaceError): + RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("some_namespace",)) + + @pytest.mark.filterwarnings( "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning" ) @@ -835,6 +853,29 @@ def test_load_table_200(rest_mock: Mocker, example_table_metadata_with_snapshot_ assert actual == expected +def test_load_table_200_loading_mode( + rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any] +) -> None: + rest_mock.get( + f"{TEST_URI}v1/namespaces/fokko/tables/table?snapshots=refs", + json=example_table_metadata_with_snapshot_v1_rest_json, + status_code=200, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{SNAPSHOT_LOADING_MODE: "refs"}) + actual = catalog.load_table(("fokko", "table")) + expected = Table( + identifier=("fokko", "table"), + metadata_location=example_table_metadata_with_snapshot_v1_rest_json["metadata-location"], + metadata=TableMetadataV1(**example_table_metadata_with_snapshot_v1_rest_json["metadata"]), + io=load_file_io(), + catalog=catalog, + ) + # First compare the dicts + assert actual.metadata.model_dump() == expected.metadata.model_dump() + assert actual == expected + + def test_load_table_honor_access_delegation( rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any] ) -> None: From e3c1c1b1497f9cd27be2d4e54d1e5aec1e69c198 Mon Sep 17 00:00:00 2001 From: Fokko Date: Tue, 13 May 2025 18:26:57 +0200 Subject: [PATCH 2/3] Add docs --- mkdocs/docs/configuration.md | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 1e364a11fe..dc51f81434 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -332,6 +332,7 @@ catalog: | rest.signing-region | us-east-1 | The region to use when SigV4 signing a request | | rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request | | oauth2-server-uri | | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') | +| snapshot-loading-mode | refs | The snapshots to return in the body of the metadata. Setting the value to `all` would return the full set of snapshots currently valid for the table. Setting the value to `refs` would load all snapshots referenced by branches or tags. | From aa2f8d2eb7c4fc80d889e8840c880565d22c0e4a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 17:32:21 +0000 Subject: [PATCH 3/3] Build: Bump pytest from 7.4.4 to 8.3.5 Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.4.4 to 8.3.5. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.4.4...8.3.5) --- updated-dependencies: - dependency-name: pytest dependency-version: 8.3.5 dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- poetry.lock | 16 ++++++++-------- pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 849485ffbb..689b06b334 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4294,14 +4294,14 @@ sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] [[package]] name = "pytest" -version = "7.4.4" +version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, - {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, + {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, + {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, ] [package.dependencies] @@ -4309,11 +4309,11 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} +pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-checkdocs" @@ -5992,4 +5992,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.1" python-versions = "^3.9.2, !=3.9.7" -content-hash = "f3d2267ce4f380399dc767a6483a1f198127856883e394d8befbd8a871bbeab9" +content-hash = "d966d56024c8b3a47e24e7581c49caa5df61c8da2abd950ae0d169abf8f38893" diff --git a/pyproject.toml b/pyproject.toml index 3116214f71..6d18aaecea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ thrift-sasl = { version = ">=0.4.3", optional = true } kerberos = {version = "^1.3.1", optional = true} [tool.poetry.group.dev.dependencies] -pytest = "7.4.4" +pytest = "8.3.5" pytest-checkdocs = "2.13.0" pytest-lazy-fixture = "0.6.3" pre-commit = "4.2.0"