|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | +# pylint:disable=redefined-outer-name |
| 18 | + |
| 19 | +import pytest |
| 20 | + |
| 21 | +from pyiceberg.catalog import Catalog |
| 22 | +from pyiceberg.exceptions import NoSuchTableError |
| 23 | +from pyiceberg.schema import Schema |
| 24 | +from pyiceberg.table import Table |
| 25 | +from pyiceberg.table.sorting import NullOrder, SortDirection, SortField, SortOrder |
| 26 | +from pyiceberg.transforms import ( |
| 27 | + IdentityTransform, |
| 28 | +) |
| 29 | + |
| 30 | + |
| 31 | +def _simple_table(catalog: Catalog, table_schema_simple: Schema, format_version: str) -> Table: |
| 32 | + return _create_table_with_schema(catalog, table_schema_simple, format_version) |
| 33 | + |
| 34 | + |
| 35 | +def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table: |
| 36 | + tbl_name = "default.test_schema_evolution" |
| 37 | + try: |
| 38 | + catalog.drop_table(tbl_name) |
| 39 | + except NoSuchTableError: |
| 40 | + pass |
| 41 | + return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version}) |
| 42 | + |
| 43 | + |
| 44 | +@pytest.mark.integration |
| 45 | +@pytest.mark.parametrize( |
| 46 | + "catalog, format_version", |
| 47 | + [ |
| 48 | + (pytest.lazy_fixture("session_catalog"), "1"), |
| 49 | + (pytest.lazy_fixture("session_catalog_hive"), "1"), |
| 50 | + (pytest.lazy_fixture("session_catalog"), "2"), |
| 51 | + (pytest.lazy_fixture("session_catalog_hive"), "2"), |
| 52 | + ], |
| 53 | +) |
| 54 | +def test_map_column_name_to_id(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None: |
| 55 | + simple_table = _simple_table(catalog, table_schema_simple, format_version) |
| 56 | + for col_name, col_id in {"foo": 1, "bar": 2, "baz": 3}.items(): |
| 57 | + assert col_id == simple_table.update_sort_order()._column_name_to_id(col_name) |
| 58 | + |
| 59 | + |
| 60 | +@pytest.mark.integration |
| 61 | +@pytest.mark.parametrize( |
| 62 | + "catalog, format_version", |
| 63 | + [ |
| 64 | + (pytest.lazy_fixture("session_catalog"), "1"), |
| 65 | + (pytest.lazy_fixture("session_catalog_hive"), "1"), |
| 66 | + (pytest.lazy_fixture("session_catalog"), "2"), |
| 67 | + (pytest.lazy_fixture("session_catalog_hive"), "2"), |
| 68 | + ], |
| 69 | +) |
| 70 | +def test_update_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None: |
| 71 | + simple_table = _simple_table(catalog, table_schema_simple, format_version) |
| 72 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).desc( |
| 73 | + "bar", IdentityTransform(), NullOrder.NULLS_LAST |
| 74 | + ).commit() |
| 75 | + assert simple_table.sort_order() == SortOrder( |
| 76 | + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), |
| 77 | + SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_LAST), |
| 78 | + order_id=1, |
| 79 | + ) |
| 80 | + |
| 81 | + |
| 82 | +@pytest.mark.integration |
| 83 | +@pytest.mark.parametrize( |
| 84 | + "catalog, format_version", |
| 85 | + [ |
| 86 | + (pytest.lazy_fixture("session_catalog"), "1"), |
| 87 | + (pytest.lazy_fixture("session_catalog_hive"), "1"), |
| 88 | + (pytest.lazy_fixture("session_catalog"), "2"), |
| 89 | + (pytest.lazy_fixture("session_catalog_hive"), "2"), |
| 90 | + ], |
| 91 | +) |
| 92 | +def test_increment_existing_sort_order_id(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None: |
| 93 | + simple_table = _simple_table(catalog, table_schema_simple, format_version) |
| 94 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit() |
| 95 | + assert simple_table.sort_order() == SortOrder( |
| 96 | + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), |
| 97 | + order_id=1, |
| 98 | + ) |
| 99 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_LAST).desc( |
| 100 | + "bar", IdentityTransform(), NullOrder.NULLS_FIRST |
| 101 | + ).commit() |
| 102 | + assert ( |
| 103 | + len(simple_table.sort_orders()) == 3 |
| 104 | + ) # 0: empty sort order from creating tables, 1: first sort order, 2: second sort order |
| 105 | + assert simple_table.sort_order() == SortOrder( |
| 106 | + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_LAST), |
| 107 | + SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.DESC, null_order=NullOrder.NULLS_FIRST), |
| 108 | + order_id=2, |
| 109 | + ) |
| 110 | + |
| 111 | + |
| 112 | +@pytest.mark.integration |
| 113 | +@pytest.mark.parametrize( |
| 114 | + "catalog, format_version", |
| 115 | + [ |
| 116 | + (pytest.lazy_fixture("session_catalog"), "1"), |
| 117 | + (pytest.lazy_fixture("session_catalog_hive"), "1"), |
| 118 | + (pytest.lazy_fixture("session_catalog"), "2"), |
| 119 | + (pytest.lazy_fixture("session_catalog_hive"), "2"), |
| 120 | + ], |
| 121 | +) |
| 122 | +def test_update_existing_sort_order(catalog: Catalog, format_version: str, table_schema_simple: Schema) -> None: |
| 123 | + simple_table = _simple_table(catalog, table_schema_simple, format_version) |
| 124 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit() |
| 125 | + assert simple_table.sort_order() == SortOrder( |
| 126 | + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), |
| 127 | + order_id=1, |
| 128 | + ) |
| 129 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_LAST).desc( |
| 130 | + "bar", IdentityTransform(), NullOrder.NULLS_FIRST |
| 131 | + ).commit() |
| 132 | + # Go back to the first sort order |
| 133 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit() |
| 134 | + assert ( |
| 135 | + len(simple_table.sort_orders()) == 3 |
| 136 | + ) # line 133 should not create a new sort order since it is the same as the first one |
| 137 | + assert simple_table.sort_order() == SortOrder( |
| 138 | + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), |
| 139 | + order_id=1, |
| 140 | + ) |
| 141 | + |
| 142 | + |
| 143 | +@pytest.mark.integration |
| 144 | +@pytest.mark.parametrize( |
| 145 | + "catalog, format_version", |
| 146 | + [ |
| 147 | + (pytest.lazy_fixture("session_catalog"), "1"), |
| 148 | + (pytest.lazy_fixture("session_catalog_hive"), "1"), |
| 149 | + (pytest.lazy_fixture("session_catalog"), "2"), |
| 150 | + (pytest.lazy_fixture("session_catalog_hive"), "2"), |
| 151 | + ], |
| 152 | +) |
| 153 | +def test_update_existing_sort_order_with_unsorted_sort_order( |
| 154 | + catalog: Catalog, format_version: str, table_schema_simple: Schema |
| 155 | +) -> None: |
| 156 | + simple_table = _simple_table(catalog, table_schema_simple, format_version) |
| 157 | + simple_table.update_sort_order().asc("foo", IdentityTransform(), NullOrder.NULLS_FIRST).commit() |
| 158 | + assert simple_table.sort_order() == SortOrder( |
| 159 | + SortField(source_id=1, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), |
| 160 | + order_id=1, |
| 161 | + ) |
| 162 | + # Table should now be unsorted |
| 163 | + simple_table.update_sort_order().commit() |
| 164 | + # Go back to the first sort order |
| 165 | + assert len(simple_table.sort_orders()) == 2 |
| 166 | + assert simple_table.sort_order() == SortOrder(order_id=0) |
0 commit comments