|
15 | 15 | # specific language governing permissions and limitations
|
16 | 16 | # under the License.
|
17 | 17 | # pylint:disable=redefined-outer-name
|
| 18 | +from typing import Optional |
18 | 19 |
|
19 | 20 | import pytest
|
20 | 21 |
|
@@ -63,12 +64,19 @@ def _table_v2(catalog: Catalog) -> Table:
|
63 | 64 | return _create_table_with_schema(catalog, schema_with_timestamp, "2")
|
64 | 65 |
|
65 | 66 |
|
66 |
| -def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table: |
| 67 | +def _create_table_with_schema( |
| 68 | + catalog: Catalog, schema: Schema, format_version: str, partition_spec: Optional[PartitionSpec] = None |
| 69 | +) -> Table: |
67 | 70 | tbl_name = "default.test_schema_evolution"
|
68 | 71 | try:
|
69 | 72 | catalog.drop_table(tbl_name)
|
70 | 73 | except NoSuchTableError:
|
71 | 74 | pass
|
| 75 | + |
| 76 | + if partition_spec: |
| 77 | + return catalog.create_table( |
| 78 | + identifier=tbl_name, schema=schema, partition_spec=partition_spec, properties={"format-version": format_version} |
| 79 | + ) |
72 | 80 | return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version})
|
73 | 81 |
|
74 | 82 |
|
@@ -582,10 +590,68 @@ def test_partition_schema_field_name_conflict(catalog: Catalog) -> None:
|
582 | 590 | with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: id"):
|
583 | 591 | table.update_spec().add_field("event_ts", DayTransform(), "id").commit()
|
584 | 592 |
|
585 |
| - with pytest.raises(ValueError, match="Cannot create identity partition from a different field in the schema: another_ts"): |
| 593 | + with pytest.raises( |
| 594 | + ValueError, match="Cannot create identity partition from a different source field in the schema: another_ts" |
| 595 | + ): |
586 | 596 | table.update_spec().add_field("event_ts", IdentityTransform(), "another_ts").commit()
|
587 |
| - with pytest.raises(ValueError, match="Cannot create identity partition from a different field in the schema: str"): |
| 597 | + with pytest.raises(ValueError, match="Cannot create identity partition from a different source field in the schema: str"): |
588 | 598 | table.update_spec().add_field("id", IdentityTransform(), "str").commit()
|
589 | 599 |
|
590 | 600 | table.update_spec().add_field("id", IdentityTransform(), "id").commit()
|
591 | 601 | table.update_spec().add_field("event_ts", YearTransform(), "event_year").commit()
|
| 602 | + |
| 603 | + |
| 604 | +@pytest.mark.integration |
| 605 | +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) |
| 606 | +def test_partition_validation_during_table_creation(catalog: Catalog) -> None: |
| 607 | + schema = Schema( |
| 608 | + NestedField(1, "id", LongType(), required=False), |
| 609 | + NestedField(2, "event_ts", TimestampType(), required=False), |
| 610 | + NestedField(3, "another_ts", TimestampType(), required=False), |
| 611 | + NestedField(4, "str", StringType(), required=False), |
| 612 | + ) |
| 613 | + |
| 614 | + partition_spec = PartitionSpec( |
| 615 | + PartitionField(source_id=2, field_id=1000, transform=YearTransform(), name="another_ts"), spec_id=1 |
| 616 | + ) |
| 617 | + with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: another_ts"): |
| 618 | + _create_table_with_schema(catalog, schema, "2", partition_spec) |
| 619 | + |
| 620 | + partition_spec = PartitionSpec( |
| 621 | + PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="id"), spec_id=1 |
| 622 | + ) |
| 623 | + _create_table_with_schema(catalog, schema, "2", partition_spec) |
| 624 | + |
| 625 | + |
| 626 | +@pytest.mark.integration |
| 627 | +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) |
| 628 | +def test_schema_evolution_partition_conflict(catalog: Catalog) -> None: |
| 629 | + schema = Schema( |
| 630 | + NestedField(1, "id", LongType(), required=False), |
| 631 | + NestedField(2, "event_ts", TimestampType(), required=False), |
| 632 | + ) |
| 633 | + partition_spec = PartitionSpec( |
| 634 | + PartitionField(source_id=2, field_id=1000, transform=YearTransform(), name="event_year"), |
| 635 | + PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="first_name"), |
| 636 | + PartitionField(source_id=1, field_id=1002, transform=IdentityTransform(), name="id"), |
| 637 | + spec_id=1, |
| 638 | + ) |
| 639 | + table = _create_table_with_schema(catalog, schema, "2", partition_spec) |
| 640 | + |
| 641 | + with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: event_year"): |
| 642 | + table.update_schema().add_column("event_year", StringType()).commit() |
| 643 | + with pytest.raises( |
| 644 | + ValueError, match="Cannot create identity partition from a different source field in the schema: first_name" |
| 645 | + ): |
| 646 | + table.update_schema().add_column("first_name", StringType()).commit() |
| 647 | + |
| 648 | + table.update_schema().add_column("other_field", StringType()).commit() |
| 649 | + |
| 650 | + with pytest.raises(ValueError, match="Cannot create partition from name that exists in schema: event_year"): |
| 651 | + table.update_schema().rename_column("other_field", "event_year").commit() |
| 652 | + with pytest.raises( |
| 653 | + ValueError, match="Cannot create identity partition from a different source field in the schema: first_name" |
| 654 | + ): |
| 655 | + table.update_schema().rename_column("other_field", "first_name").commit() |
| 656 | + |
| 657 | + table.update_schema().rename_column("other_field", "valid_name").commit() |
0 commit comments