15
15
# specific language governing permissions and limitations
16
16
# under the License.
17
17
import getpass
18
+ import logging
18
19
import socket
19
20
import time
20
21
from types import TracebackType
33
34
from hive_metastore .ThriftHiveMetastore import Client
34
35
from hive_metastore .ttypes import (
35
36
AlreadyExistsException ,
37
+ CheckLockRequest ,
36
38
FieldSchema ,
37
39
InvalidOperationException ,
38
40
LockComponent ,
49
51
)
50
52
from hive_metastore .ttypes import Database as HiveDatabase
51
53
from hive_metastore .ttypes import Table as HiveTable
54
+ from tenacity import retry , retry_if_exception_type , stop_after_attempt , wait_exponential
52
55
from thrift .protocol import TBinaryProtocol
53
56
from thrift .transport import TSocket , TTransport
54
57
69
72
NoSuchNamespaceError ,
70
73
NoSuchTableError ,
71
74
TableAlreadyExistsError ,
75
+ WaitingForLockException ,
72
76
)
73
77
from pyiceberg .io import FileIO , load_file_io
74
78
from pyiceberg .partitioning import UNPARTITIONED_PARTITION_SPEC , PartitionSpec
75
79
from pyiceberg .schema import Schema , SchemaVisitor , visit
76
80
from pyiceberg .serializers import FromInputFile
77
- from pyiceberg .table import CommitTableRequest , CommitTableResponse , PropertyUtil , Table , TableProperties , update_table_metadata
81
+ from pyiceberg .table import (
82
+ CommitTableRequest ,
83
+ CommitTableResponse ,
84
+ PropertyUtil ,
85
+ Table ,
86
+ TableProperties ,
87
+ update_table_metadata ,
88
+ )
78
89
from pyiceberg .table .metadata import new_table_metadata
79
90
from pyiceberg .table .sorting import UNSORTED_SORT_ORDER , SortOrder
80
91
from pyiceberg .typedef import EMPTY_DICT , Identifier , Properties
111
122
HIVE2_COMPATIBLE = "hive.hive2-compatible"
112
123
HIVE2_COMPATIBLE_DEFAULT = False
113
124
125
+ LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time"
126
+ LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time"
127
+ LOCK_CHECK_RETRIES = "lock-check-retries"
128
+ DEFAULT_LOCK_CHECK_MIN_WAIT_TIME = 0.1 # 100 milliseconds
129
+ DEFAULT_LOCK_CHECK_MAX_WAIT_TIME = 60 # 1 min
130
+ DEFAULT_LOCK_CHECK_RETRIES = 4
131
+
132
+ logger = logging .getLogger (__name__ )
133
+
114
134
115
135
class _HiveClient :
116
136
"""Helper class to nicely open and close the transport."""
@@ -240,6 +260,18 @@ def __init__(self, name: str, **properties: str):
240
260
super ().__init__ (name , ** properties )
241
261
self ._client = _HiveClient (properties ["uri" ], properties .get ("ugi" ))
242
262
263
+ self ._lock_check_min_wait_time = PropertyUtil .property_as_float (
264
+ properties , LOCK_CHECK_MIN_WAIT_TIME , DEFAULT_LOCK_CHECK_MIN_WAIT_TIME
265
+ )
266
+ self ._lock_check_max_wait_time = PropertyUtil .property_as_float (
267
+ properties , LOCK_CHECK_MAX_WAIT_TIME , DEFAULT_LOCK_CHECK_MAX_WAIT_TIME
268
+ )
269
+ self ._lock_check_retries = PropertyUtil .property_as_float (
270
+ properties ,
271
+ LOCK_CHECK_RETRIES ,
272
+ DEFAULT_LOCK_CHECK_RETRIES ,
273
+ )
274
+
243
275
def _convert_hive_into_iceberg (self , table : HiveTable , io : FileIO ) -> Table :
244
276
properties : Dict [str , str ] = table .parameters
245
277
if TABLE_TYPE not in properties :
@@ -356,6 +388,26 @@ def _create_lock_request(self, database_name: str, table_name: str) -> LockReque
356
388
357
389
return lock_request
358
390
391
+ def _wait_for_lock (self , database_name : str , table_name : str , lockid : int , open_client : Client ) -> LockResponse :
392
+ @retry (
393
+ retry = retry_if_exception_type (WaitingForLockException ),
394
+ wait = wait_exponential (multiplier = 2 , min = self ._lock_check_min_wait_time , max = self ._lock_check_max_wait_time ),
395
+ stop = stop_after_attempt (self ._lock_check_retries ),
396
+ reraise = True ,
397
+ )
398
+ def _do_wait_for_lock () -> LockResponse :
399
+ response : LockResponse = open_client .check_lock (CheckLockRequest (lockid = lockid ))
400
+ if response .state == LockState .ACQUIRED :
401
+ return response
402
+ elif response .state == LockState .WAITING :
403
+ msg = f"Wait on lock for { database_name } .{ table_name } "
404
+ logger .warning (msg )
405
+ raise WaitingForLockException (msg )
406
+ else :
407
+ raise CommitFailedException (f"Failed to check lock for { database_name } .{ table_name } , state: { response .state } " )
408
+
409
+ return _do_wait_for_lock ()
410
+
359
411
def _commit_table (self , table_request : CommitTableRequest ) -> CommitTableResponse :
360
412
"""Update the table.
361
413
@@ -380,7 +432,10 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
380
432
381
433
try :
382
434
if lock .state != LockState .ACQUIRED :
383
- raise CommitFailedException (f"Failed to acquire lock for { table_request .identifier } , state: { lock .state } " )
435
+ if lock .state == LockState .WAITING :
436
+ self ._wait_for_lock (database_name , table_name , lock .lockid , open_client )
437
+ else :
438
+ raise CommitFailedException (f"Failed to acquire lock for { table_request .identifier } , state: { lock .state } " )
384
439
385
440
hive_table = open_client .get_table (dbname = database_name , tbl_name = table_name )
386
441
io = load_file_io ({** self .properties , ** hive_table .parameters }, hive_table .sd .location )
@@ -406,6 +461,8 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
406
461
open_client .alter_table (dbname = database_name , tbl_name = table_name , new_tbl = hive_table )
407
462
except NoSuchObjectException as e :
408
463
raise NoSuchTableError (f"Table does not exist: { table_name } " ) from e
464
+ except WaitingForLockException as e :
465
+ raise CommitFailedException (f"Failed to acquire lock for { table_request .identifier } , state: { lock .state } " ) from e
409
466
finally :
410
467
open_client .unlock (UnlockRequest (lockid = lock .lockid ))
411
468
0 commit comments