70
70
NamespaceNotEmptyError ,
71
71
NoSuchIcebergTableError ,
72
72
NoSuchNamespaceError ,
73
+ NoSuchPropertyException ,
73
74
NoSuchTableError ,
74
75
TableAlreadyExistsError ,
75
76
WaitingForLockException ,
76
77
)
77
- from pyiceberg .io import FileIO , load_file_io
78
78
from pyiceberg .partitioning import UNPARTITIONED_PARTITION_SPEC , PartitionSpec
79
79
from pyiceberg .schema import Schema , SchemaVisitor , visit
80
80
from pyiceberg .serializers import FromInputFile
81
81
from pyiceberg .table import (
82
82
CommitTableRequest ,
83
83
CommitTableResponse ,
84
84
PropertyUtil ,
85
+ StagedTable ,
85
86
Table ,
86
87
TableProperties ,
87
- update_table_metadata ,
88
88
)
89
- from pyiceberg .table .metadata import new_table_metadata
90
89
from pyiceberg .table .sorting import UNSORTED_SORT_ORDER , SortOrder
91
90
from pyiceberg .typedef import EMPTY_DICT , Identifier , Properties
92
91
from pyiceberg .types import (
@@ -272,10 +271,12 @@ def __init__(self, name: str, **properties: str):
272
271
DEFAULT_LOCK_CHECK_RETRIES ,
273
272
)
274
273
275
- def _convert_hive_into_iceberg (self , table : HiveTable , io : FileIO ) -> Table :
274
+ def _convert_hive_into_iceberg (self , table : HiveTable ) -> Table :
276
275
properties : Dict [str , str ] = table .parameters
277
276
if TABLE_TYPE not in properties :
278
- raise NoSuchTableError (f"Property table_type missing, could not determine type: { table .dbName } .{ table .tableName } " )
277
+ raise NoSuchPropertyException (
278
+ f"Property table_type missing, could not determine type: { table .dbName } .{ table .tableName } "
279
+ )
279
280
280
281
table_type = properties [TABLE_TYPE ]
281
282
if table_type .lower () != ICEBERG :
@@ -286,8 +287,9 @@ def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table:
286
287
if prop_metadata_location := properties .get (METADATA_LOCATION ):
287
288
metadata_location = prop_metadata_location
288
289
else :
289
- raise NoSuchTableError (f"Table property { METADATA_LOCATION } is missing" )
290
+ raise NoSuchPropertyException (f"Table property { METADATA_LOCATION } is missing" )
290
291
292
+ io = self ._load_file_io (location = metadata_location )
291
293
file = io .new_input (metadata_location )
292
294
metadata = FromInputFile .table_metadata (file )
293
295
return Table (
@@ -298,6 +300,38 @@ def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table:
298
300
catalog = self ,
299
301
)
300
302
303
+ def _convert_iceberg_into_hive (self , table : Table ) -> HiveTable :
304
+ identifier_tuple = self .identifier_to_tuple_without_catalog (table .identifier )
305
+ database_name , table_name = self .identifier_to_database_and_table (identifier_tuple , NoSuchTableError )
306
+ current_time_millis = int (time .time () * 1000 )
307
+
308
+ return HiveTable (
309
+ dbName = database_name ,
310
+ tableName = table_name ,
311
+ owner = table .properties [OWNER ] if table .properties and OWNER in table .properties else getpass .getuser (),
312
+ createTime = current_time_millis // 1000 ,
313
+ lastAccessTime = current_time_millis // 1000 ,
314
+ sd = _construct_hive_storage_descriptor (
315
+ table .schema (),
316
+ table .location (),
317
+ PropertyUtil .property_as_bool (self .properties , HIVE2_COMPATIBLE , HIVE2_COMPATIBLE_DEFAULT ),
318
+ ),
319
+ tableType = EXTERNAL_TABLE ,
320
+ parameters = _construct_parameters (table .metadata_location ),
321
+ )
322
+
323
+ def _create_hive_table (self , open_client : Client , hive_table : HiveTable ) -> None :
324
+ try :
325
+ open_client .create_table (hive_table )
326
+ except AlreadyExistsException as e :
327
+ raise TableAlreadyExistsError (f"Table { hive_table .dbName } .{ hive_table .tableName } already exists" ) from e
328
+
329
+ def _get_hive_table (self , open_client : Client , database_name : str , table_name : str ) -> HiveTable :
330
+ try :
331
+ return open_client .get_table (dbname = database_name , tbl_name = table_name )
332
+ except NoSuchObjectException as e :
333
+ raise NoSuchTableError (f"Table does not exists: { table_name } " ) from e
334
+
301
335
def create_table (
302
336
self ,
303
337
identifier : Union [str , Identifier ],
@@ -324,45 +358,25 @@ def create_table(
324
358
AlreadyExistsError: If a table with the name already exists.
325
359
ValueError: If the identifier is invalid.
326
360
"""
327
- schema : Schema = self ._convert_schema_if_needed (schema ) # type: ignore
328
-
329
361
properties = {** DEFAULT_PROPERTIES , ** properties }
330
- database_name , table_name = self .identifier_to_database_and_table (identifier )
331
- current_time_millis = int (time .time () * 1000 )
332
-
333
- location = self ._resolve_table_location (location , database_name , table_name )
334
-
335
- metadata_location = self ._get_metadata_location (location = location )
336
- metadata = new_table_metadata (
337
- location = location ,
362
+ staged_table = self ._create_staged_table (
363
+ identifier = identifier ,
338
364
schema = schema ,
365
+ location = location ,
339
366
partition_spec = partition_spec ,
340
367
sort_order = sort_order ,
341
368
properties = properties ,
342
369
)
343
- io = load_file_io ({** self .properties , ** properties }, location = location )
344
- self ._write_metadata (metadata , io , metadata_location )
370
+ database_name , table_name = self .identifier_to_database_and_table (identifier )
345
371
346
- tbl = HiveTable (
347
- dbName = database_name ,
348
- tableName = table_name ,
349
- owner = properties [OWNER ] if properties and OWNER in properties else getpass .getuser (),
350
- createTime = current_time_millis // 1000 ,
351
- lastAccessTime = current_time_millis // 1000 ,
352
- sd = _construct_hive_storage_descriptor (
353
- schema , location , PropertyUtil .property_as_bool (self .properties , HIVE2_COMPATIBLE , HIVE2_COMPATIBLE_DEFAULT )
354
- ),
355
- tableType = EXTERNAL_TABLE ,
356
- parameters = _construct_parameters (metadata_location ),
357
- )
358
- try :
359
- with self ._client as open_client :
360
- open_client .create_table (tbl )
361
- hive_table = open_client .get_table (dbname = database_name , tbl_name = table_name )
362
- except AlreadyExistsException as e :
363
- raise TableAlreadyExistsError (f"Table { database_name } .{ table_name } already exists" ) from e
372
+ self ._write_metadata (staged_table .metadata , staged_table .io , staged_table .metadata_location )
373
+ tbl = self ._convert_iceberg_into_hive (staged_table )
374
+
375
+ with self ._client as open_client :
376
+ self ._create_hive_table (open_client , tbl )
377
+ hive_table = open_client .get_table (dbname = database_name , tbl_name = table_name )
364
378
365
- return self ._convert_hive_into_iceberg (hive_table , io )
379
+ return self ._convert_hive_into_iceberg (hive_table )
366
380
367
381
def register_table (self , identifier : Union [str , Identifier ], metadata_location : str ) -> Table :
368
382
"""Register a new table using existing metadata.
@@ -437,36 +451,52 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
437
451
else :
438
452
raise CommitFailedException (f"Failed to acquire lock for { table_request .identifier } , state: { lock .state } " )
439
453
440
- hive_table = open_client .get_table (dbname = database_name , tbl_name = table_name )
441
- io = load_file_io ({** self .properties , ** hive_table .parameters }, hive_table .sd .location )
442
- current_table = self ._convert_hive_into_iceberg (hive_table , io )
443
-
444
- base_metadata = current_table .metadata
445
- for requirement in table_request .requirements :
446
- requirement .validate (base_metadata )
447
-
448
- updated_metadata = update_table_metadata (base_metadata , table_request .updates )
449
- if updated_metadata == base_metadata :
454
+ hive_table : Optional [HiveTable ]
455
+ current_table : Optional [Table ]
456
+ try :
457
+ hive_table = self ._get_hive_table (open_client , database_name , table_name )
458
+ current_table = self ._convert_hive_into_iceberg (hive_table )
459
+ except NoSuchTableError :
460
+ hive_table = None
461
+ current_table = None
462
+
463
+ updated_staged_table = self ._update_and_stage_table (current_table , table_request )
464
+ if current_table and updated_staged_table .metadata == current_table .metadata :
450
465
# no changes, do nothing
451
- return CommitTableResponse (metadata = base_metadata , metadata_location = current_table .metadata_location )
452
-
453
- # write new metadata
454
- new_metadata_version = self ._parse_metadata_version (current_table .metadata_location ) + 1
455
- new_metadata_location = self ._get_metadata_location (current_table .metadata .location , new_metadata_version )
456
- self ._write_metadata (updated_metadata , current_table .io , new_metadata_location )
457
-
458
- hive_table .parameters = _construct_parameters (
459
- metadata_location = new_metadata_location , previous_metadata_location = current_table .metadata_location
466
+ return CommitTableResponse (metadata = current_table .metadata , metadata_location = current_table .metadata_location )
467
+ self ._write_metadata (
468
+ metadata = updated_staged_table .metadata ,
469
+ io = updated_staged_table .io ,
470
+ metadata_path = updated_staged_table .metadata_location ,
460
471
)
461
- open_client .alter_table (dbname = database_name , tbl_name = table_name , new_tbl = hive_table )
462
- except NoSuchObjectException as e :
463
- raise NoSuchTableError (f"Table does not exist: { table_name } " ) from e
472
+
473
+ if hive_table and current_table :
474
+ # Table exists, update it.
475
+ hive_table .parameters = _construct_parameters (
476
+ metadata_location = updated_staged_table .metadata_location ,
477
+ previous_metadata_location = current_table .metadata_location ,
478
+ )
479
+ open_client .alter_table (dbname = database_name , tbl_name = table_name , new_tbl = hive_table )
480
+ else :
481
+ # Table does not exist, create it.
482
+ hive_table = self ._convert_iceberg_into_hive (
483
+ StagedTable (
484
+ identifier = (self .name , database_name , table_name ),
485
+ metadata = updated_staged_table .metadata ,
486
+ metadata_location = updated_staged_table .metadata_location ,
487
+ io = updated_staged_table .io ,
488
+ catalog = self ,
489
+ )
490
+ )
491
+ self ._create_hive_table (open_client , hive_table )
464
492
except WaitingForLockException as e :
465
493
raise CommitFailedException (f"Failed to acquire lock for { table_request .identifier } , state: { lock .state } " ) from e
466
494
finally :
467
495
open_client .unlock (UnlockRequest (lockid = lock .lockid ))
468
496
469
- return CommitTableResponse (metadata = updated_metadata , metadata_location = new_metadata_location )
497
+ return CommitTableResponse (
498
+ metadata = updated_staged_table .metadata , metadata_location = updated_staged_table .metadata_location
499
+ )
470
500
471
501
def load_table (self , identifier : Union [str , Identifier ]) -> Table :
472
502
"""Load the table's metadata and return the table instance.
@@ -485,14 +515,11 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
485
515
"""
486
516
identifier_tuple = self .identifier_to_tuple_without_catalog (identifier )
487
517
database_name , table_name = self .identifier_to_database_and_table (identifier_tuple , NoSuchTableError )
488
- try :
489
- with self ._client as open_client :
490
- hive_table = open_client .get_table (dbname = database_name , tbl_name = table_name )
491
- except NoSuchObjectException as e :
492
- raise NoSuchTableError (f"Table does not exists: { table_name } " ) from e
493
518
494
- io = load_file_io ({** self .properties , ** hive_table .parameters }, hive_table .sd .location )
495
- return self ._convert_hive_into_iceberg (hive_table , io )
519
+ with self ._client as open_client :
520
+ hive_table = self ._get_hive_table (open_client , database_name , table_name )
521
+
522
+ return self ._convert_hive_into_iceberg (hive_table )
496
523
497
524
def drop_table (self , identifier : Union [str , Identifier ]) -> None :
498
525
"""Drop a table.
0 commit comments