|
10 | 10 | import pytest |
11 | 11 |
|
12 | 12 | import awswrangler as wr |
| 13 | +from awswrangler._data_types import _split_fields |
13 | 14 |
|
14 | 15 | from ._utils import ensure_data_types, get_df, get_df_cast, get_df_list |
15 | 16 |
|
@@ -674,3 +675,35 @@ def test_cast_decimal(path, glue_table, glue_database): |
674 | 675 | assert df2["c1"].iloc[0] == Decimal((0, (1, 0, 0, 1), -1)) |
675 | 676 | assert df2["c2"].iloc[0] == Decimal((0, (1, 0, 0, 1), -1)) |
676 | 677 | assert df2["c3"].iloc[0] == "100.1" |
| 678 | + |
| 679 | + |
| 680 | +def test_splits(): |
| 681 | + s = "a:struct<id:string,name:string>,b:struct<id:string,name:string>" |
| 682 | + assert list(_split_fields(s)) == ["a:struct<id:string,name:string>", "b:struct<id:string,name:string>"] |
| 683 | + s = "a:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>,b:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>" # noqa |
| 684 | + assert list(_split_fields(s)) == [ |
| 685 | + "a:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>", |
| 686 | + "b:struct<a:struct<id:string,name:string>,b:struct<id:string,name:string>>", |
| 687 | + ] |
| 688 | + s = "a:struct<id:string,name:string>,b:struct<id:string,name:string>,c:struct<id:string,name:string>,d:struct<id:string,name:string>" # noqa |
| 689 | + assert list(_split_fields(s)) == [ |
| 690 | + "a:struct<id:string,name:string>", |
| 691 | + "b:struct<id:string,name:string>", |
| 692 | + "c:struct<id:string,name:string>", |
| 693 | + "d:struct<id:string,name:string>", |
| 694 | + ] |
| 695 | + |
| 696 | + |
| 697 | +def test_to_parquet_nested_structs(glue_database, glue_table, path): |
| 698 | + df = pd.DataFrame( |
| 699 | + { |
| 700 | + "c0": [1], |
| 701 | + "c1": [[{"a": {"id": "0", "name": "foo", "amount": 1}, "b": {"id": "1", "name": "boo", "amount": 2}}]], |
| 702 | + } |
| 703 | + ) |
| 704 | + wr.s3.to_parquet(df=df, path=path, dataset=True, database=glue_database, table=glue_table) |
| 705 | + df2 = wr.athena.read_sql_query(sql=f"SELECT * FROM {glue_table}", database=glue_database) |
| 706 | + assert df2.shape == (1, 2) |
| 707 | + wr.s3.to_parquet(df=df, path=path, dataset=True, database=glue_database, table=glue_table) |
| 708 | + df3 = wr.athena.read_sql_query(sql=f"SELECT * FROM {glue_table}", database=glue_database) |
| 709 | + assert df3.shape == (2, 2) |
0 commit comments