|
20 | 20 |
|
21 | 21 | from pyiceberg.catalog import Catalog
|
22 | 22 | from pyiceberg.exceptions import NoSuchTableError
|
23 |
| -from pyiceberg.partitioning import PartitionField, PartitionSpec |
| 23 | +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec |
24 | 24 | from pyiceberg.schema import Schema
|
25 | 25 | from pyiceberg.table import Table
|
26 | 26 | from pyiceberg.transforms import (
|
@@ -63,13 +63,18 @@ def _table_v2(catalog: Catalog) -> Table:
|
63 | 63 | return _create_table_with_schema(catalog, schema_with_timestamp, "2")
|
64 | 64 |
|
65 | 65 |
|
66 |
| -def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table: |
| 66 | +def _create_table_with_schema( |
| 67 | + catalog: Catalog, schema: Schema, format_version: str, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC |
| 68 | +) -> Table: |
67 | 69 | tbl_name = "default.test_schema_evolution"
|
68 | 70 | try:
|
69 | 71 | catalog.drop_table(tbl_name)
|
70 | 72 | except NoSuchTableError:
|
71 | 73 | pass
|
72 |
| - return catalog.create_table(identifier=tbl_name, schema=schema, properties={"format-version": format_version}) |
| 74 | + |
| 75 | + return catalog.create_table( |
| 76 | + identifier=tbl_name, schema=schema, partition_spec=partition_spec, properties={"format-version": format_version} |
| 77 | + ) |
73 | 78 |
|
74 | 79 |
|
75 | 80 | @pytest.mark.integration
|
@@ -564,3 +569,80 @@ def _validate_new_partition_fields(
|
564 | 569 | assert len(spec.fields) == len(expected_partition_fields)
|
565 | 570 | for i in range(len(spec.fields)):
|
566 | 571 | assert spec.fields[i] == expected_partition_fields[i]
|
| 572 | + |
| 573 | + |
| 574 | +@pytest.mark.integration |
| 575 | +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) |
| 576 | +def test_partition_schema_field_name_conflict(catalog: Catalog) -> None: |
| 577 | + schema = Schema( |
| 578 | + NestedField(1, "id", LongType(), required=False), |
| 579 | + NestedField(2, "event_ts", TimestampType(), required=False), |
| 580 | + NestedField(3, "another_ts", TimestampType(), required=False), |
| 581 | + NestedField(4, "str", StringType(), required=False), |
| 582 | + ) |
| 583 | + table = _create_table_with_schema(catalog, schema, "2") |
| 584 | + |
| 585 | + with pytest.raises(ValueError, match="Cannot create partition with a name that exists in schema: another_ts"): |
| 586 | + table.update_spec().add_field("event_ts", YearTransform(), "another_ts").commit() |
| 587 | + with pytest.raises(ValueError, match="Cannot create partition with a name that exists in schema: id"): |
| 588 | + table.update_spec().add_field("event_ts", DayTransform(), "id").commit() |
| 589 | + |
| 590 | + with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: another_ts"): |
| 591 | + table.update_spec().add_field("event_ts", IdentityTransform(), "another_ts").commit() |
| 592 | + with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: str"): |
| 593 | + table.update_spec().add_field("id", IdentityTransform(), "str").commit() |
| 594 | + |
| 595 | + table.update_spec().add_field("id", IdentityTransform(), "id").commit() |
| 596 | + table.update_spec().add_field("event_ts", YearTransform(), "event_year").commit() |
| 597 | + |
| 598 | + |
| 599 | +@pytest.mark.integration |
| 600 | +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) |
| 601 | +def test_partition_validation_during_table_creation(catalog: Catalog) -> None: |
| 602 | + schema = Schema( |
| 603 | + NestedField(1, "id", LongType(), required=False), |
| 604 | + NestedField(2, "event_ts", TimestampType(), required=False), |
| 605 | + NestedField(3, "another_ts", TimestampType(), required=False), |
| 606 | + NestedField(4, "str", StringType(), required=False), |
| 607 | + ) |
| 608 | + |
| 609 | + partition_spec = PartitionSpec( |
| 610 | + PartitionField(source_id=2, field_id=1000, transform=YearTransform(), name="another_ts"), spec_id=1 |
| 611 | + ) |
| 612 | + with pytest.raises(ValueError, match="Cannot create partition with a name that exists in schema: another_ts"): |
| 613 | + _create_table_with_schema(catalog, schema, "2", partition_spec) |
| 614 | + |
| 615 | + partition_spec = PartitionSpec( |
| 616 | + PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="id"), spec_id=1 |
| 617 | + ) |
| 618 | + _create_table_with_schema(catalog, schema, "2", partition_spec) |
| 619 | + |
| 620 | + |
| 621 | +@pytest.mark.integration |
| 622 | +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) |
| 623 | +def test_schema_evolution_partition_conflict(catalog: Catalog) -> None: |
| 624 | + schema = Schema( |
| 625 | + NestedField(1, "id", LongType(), required=False), |
| 626 | + NestedField(2, "event_ts", TimestampType(), required=False), |
| 627 | + ) |
| 628 | + partition_spec = PartitionSpec( |
| 629 | + PartitionField(source_id=2, field_id=1000, transform=YearTransform(), name="event_year"), |
| 630 | + PartitionField(source_id=2, field_id=1001, transform=IdentityTransform(), name="first_name"), |
| 631 | + PartitionField(source_id=1, field_id=1002, transform=IdentityTransform(), name="id"), |
| 632 | + spec_id=1, |
| 633 | + ) |
| 634 | + table = _create_table_with_schema(catalog, schema, "2", partition_spec) |
| 635 | + |
| 636 | + with pytest.raises(ValueError, match="Cannot create partition with a name that exists in schema: event_year"): |
| 637 | + table.update_schema().add_column("event_year", StringType()).commit() |
| 638 | + with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: first_name"): |
| 639 | + table.update_schema().add_column("first_name", StringType()).commit() |
| 640 | + |
| 641 | + table.update_schema().add_column("other_field", StringType()).commit() |
| 642 | + |
| 643 | + with pytest.raises(ValueError, match="Cannot create partition with a name that exists in schema: event_year"): |
| 644 | + table.update_schema().rename_column("other_field", "event_year").commit() |
| 645 | + with pytest.raises(ValueError, match="Cannot create identity partition sourced from different field in schema: first_name"): |
| 646 | + table.update_schema().rename_column("other_field", "first_name").commit() |
| 647 | + |
| 648 | + table.update_schema().rename_column("other_field", "valid_name").commit() |
0 commit comments