11from __future__ import annotations
22
3- from typing import Any
3+ from typing import Any , Iterator
44
5+ import boto3
56import pytest
67
78import awswrangler as wr
89import awswrangler .pandas as pd
910
11+ from .._utils import (
12+ get_time_str_with_random_suffix ,
13+ )
14+
15+
16+ @pytest .fixture (scope = "session" )
17+ def lock_dynamodb_table () -> Iterator [str ]:
18+ name = f"deltalake_lock_{ get_time_str_with_random_suffix ()} "
19+ print (f"Table name: { name } " )
20+
21+ dynamodb_client = boto3 .client ("dynamodb" )
22+ dynamodb_client .create_table (
23+ TableName = name ,
24+ BillingMode = "PAY_PER_REQUEST" ,
25+ KeySchema = [
26+ {"AttributeName" : "tablePath" , "KeyType" : "HASH" },
27+ {"AttributeName" : "fileName" , "KeyType" : "RANGE" },
28+ ],
29+ AttributeDefinitions = [
30+ {"AttributeName" : "tablePath" , "AttributeType" : "S" },
31+ {"AttributeName" : "fileName" , "AttributeType" : "S" },
32+ ],
33+ )
34+
35+ dynamodb_client .get_waiter ("table_exists" ).wait (TableName = name )
36+
37+ yield name
38+
39+ dynamodb_client .delete_table (TableName = name )
40+ dynamodb_client .get_waiter ("table_not_exists" ).wait (TableName = name )
41+ print (f"Table { name } deleted." )
42+
43+
44+ @pytest .fixture (params = ["no_lock" , "dynamodb_lock" ], scope = "session" )
45+ def lock_settings (request : pytest .FixtureRequest ) -> dict [str , Any ]:
46+ if request .param == "no_lock" :
47+ return dict (s3_allow_unsafe_rename = True )
48+ else :
49+ return dict (lock_dynamodb_table = request .getfixturevalue ("lock_dynamodb_table" ))
50+
1051
1152@pytest .mark .parametrize ("s3_additional_kwargs" , [None , {"ServerSideEncryption" : "AES256" }])
1253@pytest .mark .parametrize (
1354 "pyarrow_additional_kwargs" , [{"safe" : True , "deduplicate_objects" : False , "types_mapper" : None }]
1455)
1556def test_read_deltalake (
16- path : str , s3_additional_kwargs : dict [str , Any ] | None , pyarrow_additional_kwargs : dict [str , Any ]
57+ path : str ,
58+ lock_settings : dict [str , Any ],
59+ s3_additional_kwargs : dict [str , Any ] | None ,
60+ pyarrow_additional_kwargs : dict [str , Any ],
1761) -> None :
1862 df = pd .DataFrame ({"c0" : [1 , 2 , 3 ], "c1" : ["foo" , None , "bar" ], "c2" : [3.0 , 4.0 , 5.0 ], "c3" : [True , False , None ]})
19- wr .s3 .to_deltalake (path = path , df = df , s3_additional_kwargs = s3_additional_kwargs , s3_allow_unsafe_rename = True )
63+ wr .s3 .to_deltalake (path = path , df = df , s3_additional_kwargs = s3_additional_kwargs , ** lock_settings )
2064
2165 df2 = wr .s3 .read_deltalake (
2266 path = path , s3_additional_kwargs = s3_additional_kwargs , pyarrow_additional_kwargs = pyarrow_additional_kwargs
@@ -25,15 +69,17 @@ def test_read_deltalake(
2569
2670
2771@pytest .mark .parametrize ("pyarrow_additional_kwargs" , [{"types_mapper" : None }])
28- def test_read_deltalake_versioned (path : str , pyarrow_additional_kwargs : dict [str , Any ]) -> None :
72+ def test_read_deltalake_versioned (
73+ path : str , lock_settings : dict [str , Any ], pyarrow_additional_kwargs : dict [str , Any ]
74+ ) -> None :
2975 df = pd .DataFrame ({"c0" : [1 , 2 , 3 ], "c1" : ["foo" , "baz" , "bar" ]})
30- wr .s3 .to_deltalake (path = path , df = df , s3_allow_unsafe_rename = True )
76+ wr .s3 .to_deltalake (path = path , df = df , ** lock_settings )
3177
3278 df2 = wr .s3 .read_deltalake (path = path , pyarrow_additional_kwargs = pyarrow_additional_kwargs )
3379 assert df2 .equals (df )
3480
3581 df ["c2" ] = [True , False , True ]
36- wr .s3 .to_deltalake (path = path , df = df , mode = "overwrite" , overwrite_schema = True , s3_allow_unsafe_rename = True )
82+ wr .s3 .to_deltalake (path = path , df = df , mode = "overwrite" , overwrite_schema = True , ** lock_settings )
3783
3884 df3 = wr .s3 .read_deltalake (path = path , version = 0 , pyarrow_additional_kwargs = pyarrow_additional_kwargs )
3985 assert df3 .equals (df .drop ("c2" , axis = 1 ))
@@ -42,9 +88,9 @@ def test_read_deltalake_versioned(path: str, pyarrow_additional_kwargs: dict[str
4288 assert df4 .equals (df )
4389
4490
45- def test_read_deltalake_partitions (path : str ) -> None :
91+ def test_read_deltalake_partitions (path : str , lock_settings : dict [ str , Any ] ) -> None :
4692 df = pd .DataFrame ({"c0" : [1 , 2 , 3 ], "c1" : [True , False , True ], "par0" : ["foo" , "foo" , "bar" ], "par1" : [1 , 2 , 2 ]})
47- wr .s3 .to_deltalake (path = path , df = df , partition_cols = ["par0" , "par1" ], s3_allow_unsafe_rename = True )
93+ wr .s3 .to_deltalake (path = path , df = df , partition_cols = ["par0" , "par1" ], ** lock_settings )
4894
4995 df2 = wr .s3 .read_deltalake (path = path , columns = ["c0" ], partitions = [("par0" , "=" , "foo" ), ("par1" , "=" , "1" )])
5096 assert df2 .shape == (1 , 1 )
0 commit comments