|
33 | 33 | )
|
34 | 34 | from pyiceberg.expressions.literals import literal
|
35 | 35 | from pyiceberg.io.pyarrow import (
|
| 36 | + UnsupportedPyArrowTypeException, |
36 | 37 | _ConvertToArrowSchema,
|
37 | 38 | _ConvertToIceberg,
|
38 | 39 | _ConvertToIcebergWithoutIDs,
|
@@ -625,6 +626,91 @@ def test_pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids: pa
|
625 | 626 | assert _pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids) == expected_schema
|
626 | 627 |
|
627 | 628 |
|
| 629 | +def test_pyarrow_schema_unsupported_type() -> None: |
| 630 | + unsupported_field = pa.field("latitude", pa.decimal256(20, 26), nullable=False, metadata={"PARQUET:field_id": "2"}) |
| 631 | + schema = pa.schema( |
| 632 | + [ |
| 633 | + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1"}), |
| 634 | + pa.field( |
| 635 | + "location", |
| 636 | + pa.large_list( |
| 637 | + pa.field( |
| 638 | + "item", |
| 639 | + pa.struct( |
| 640 | + [ |
| 641 | + unsupported_field, |
| 642 | + pa.field("longitude", pa.float32(), nullable=False, metadata={"PARQUET:field_id": "3"}), |
| 643 | + ] |
| 644 | + ), |
| 645 | + metadata={"PARQUET:field_id": "4"}, |
| 646 | + ) |
| 647 | + ), |
| 648 | + nullable=False, |
| 649 | + metadata={"PARQUET:field_id": "5"}, |
| 650 | + ), |
| 651 | + ], |
| 652 | + metadata={"PARQUET:field_id": "6"}, |
| 653 | + ) |
| 654 | + with pytest.raises( |
| 655 | + UnsupportedPyArrowTypeException, match=re.escape("Column 'latitude' has an unsupported type: decimal256(20, 26)") |
| 656 | + ) as exc_info: |
| 657 | + pyarrow_to_schema(schema) |
| 658 | + assert exc_info.value.field == unsupported_field |
| 659 | + exception_cause = exc_info.value.__cause__ |
| 660 | + assert isinstance(exception_cause, TypeError) |
| 661 | + assert "Unsupported type: decimal256(20, 26)" in exception_cause.args[0] |
| 662 | + |
| 663 | + unsupported_field = pa.field( |
| 664 | + "quux", |
| 665 | + pa.map_( |
| 666 | + pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}), |
| 667 | + pa.field( |
| 668 | + "value", |
| 669 | + pa.map_( |
| 670 | + pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "5"}), |
| 671 | + pa.field("value", pa.decimal256(2, 3), metadata={"PARQUET:field_id": "6"}), |
| 672 | + ), |
| 673 | + nullable=False, |
| 674 | + metadata={"PARQUET:field_id": "4"}, |
| 675 | + ), |
| 676 | + ), |
| 677 | + nullable=False, |
| 678 | + metadata={"PARQUET:field_id": "3"}, |
| 679 | + ) |
| 680 | + schema = pa.schema( |
| 681 | + [ |
| 682 | + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1"}), |
| 683 | + unsupported_field, |
| 684 | + ] |
| 685 | + ) |
| 686 | + with pytest.raises( |
| 687 | + UnsupportedPyArrowTypeException, |
| 688 | + match=re.escape("Column 'quux' has an unsupported type: map<string, map<string, decimal256(2, 3)>>"), |
| 689 | + ) as exc_info: |
| 690 | + pyarrow_to_schema(schema) |
| 691 | + assert exc_info.value.field == unsupported_field |
| 692 | + exception_cause = exc_info.value.__cause__ |
| 693 | + assert isinstance(exception_cause, TypeError) |
| 694 | + assert "Unsupported type: decimal256(2, 3)" in exception_cause.args[0] |
| 695 | + |
| 696 | + unsupported_field = pa.field("foo", pa.timestamp(unit="ns"), nullable=False, metadata={"PARQUET:field_id": "1"}) |
| 697 | + schema = pa.schema( |
| 698 | + [ |
| 699 | + unsupported_field, |
| 700 | + pa.field("bar", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "2"}), |
| 701 | + ] |
| 702 | + ) |
| 703 | + with pytest.raises( |
| 704 | + UnsupportedPyArrowTypeException, |
| 705 | + match=re.escape("Column 'foo' has an unsupported type: timestamp[ns]"), |
| 706 | + ) as exc_info: |
| 707 | + pyarrow_to_schema(schema) |
| 708 | + assert exc_info.value.field == unsupported_field |
| 709 | + exception_cause = exc_info.value.__cause__ |
| 710 | + assert isinstance(exception_cause, TypeError) |
| 711 | + assert "Iceberg does not yet support 'ns' timestamp precision" in exception_cause.args[0] |
| 712 | + |
| 713 | + |
628 | 714 | def test_pyarrow_schema_round_trip_ensure_large_types_and_then_small_types(pyarrow_schema_nested_without_ids: pa.Schema) -> None:
|
629 | 715 | schema_with_large_types = _pyarrow_schema_ensure_large_types(pyarrow_schema_nested_without_ids)
|
630 | 716 | assert _pyarrow_schema_ensure_small_types(schema_with_large_types) == pyarrow_schema_nested_without_ids
|
|
0 commit comments