20
20
from copy import copy
21
21
from dataclasses import dataclass
22
22
from enum import Enum
23
- from typing import TYPE_CHECKING , Dict , List , Optional , Set , Tuple , Union
23
+ from typing import TYPE_CHECKING , Any , Dict , List , Optional , Set , Tuple , Union
24
24
25
25
from pyiceberg .exceptions import ResolveError , ValidationError
26
+ from pyiceberg .expressions import literal # type: ignore
26
27
from pyiceberg .schema import (
27
28
PartnerAccessor ,
28
29
Schema ,
47
48
UpdatesAndRequirements ,
48
49
UpdateTableMetadata ,
49
50
)
51
+ from pyiceberg .typedef import L
50
52
from pyiceberg .types import IcebergType , ListType , MapType , NestedField , PrimitiveType , StructType
51
53
52
54
if TYPE_CHECKING :
@@ -153,7 +155,12 @@ def union_by_name(self, new_schema: Union[Schema, "pa.Schema"]) -> UpdateSchema:
153
155
return self
154
156
155
157
def add_column (
156
- self , path : Union [str , Tuple [str , ...]], field_type : IcebergType , doc : Optional [str ] = None , required : bool = False
158
+ self ,
159
+ path : Union [str , Tuple [str , ...]],
160
+ field_type : IcebergType ,
161
+ doc : Optional [str ] = None ,
162
+ required : bool = False ,
163
+ default_value : Optional [L ] = None ,
157
164
) -> UpdateSchema :
158
165
"""Add a new column to a nested struct or Add a new top-level column.
159
166
@@ -168,6 +175,7 @@ def add_column(
168
175
field_type: Type for the new column.
169
176
doc: Documentation string for the new column.
170
177
required: Whether the new column is required.
178
+ default_value: Default value for the new column.
171
179
172
180
Returns:
173
181
This for method chaining.
@@ -177,10 +185,6 @@ def add_column(
177
185
raise ValueError (f"Cannot add column with ambiguous name: { path } , provide a tuple instead" )
178
186
path = (path ,)
179
187
180
- if required and not self ._allow_incompatible_changes :
181
- # Table format version 1 and 2 cannot add required column because there is no initial value
182
- raise ValueError (f"Incompatible change: cannot add required column: { '.' .join (path )} " )
183
-
184
188
name = path [- 1 ]
185
189
parent = path [:- 1 ]
186
190
@@ -212,13 +216,34 @@ def add_column(
212
216
213
217
# assign new IDs in order
214
218
new_id = self .assign_new_column_id ()
219
+ new_type = assign_fresh_schema_ids (field_type , self .assign_new_column_id )
220
+
221
+ if default_value is not None :
222
+ try :
223
+ # To make sure that the value is valid for the type
224
+ initial_default = literal (default_value ).to (new_type ).value
225
+ except ValueError as e :
226
+ raise ValueError (f"Invalid default value: { e } " ) from e
227
+ else :
228
+ initial_default = default_value # type: ignore
229
+
230
+ if (required and initial_default is None ) and not self ._allow_incompatible_changes :
231
+ # Table format version 1 and 2 cannot add required column because there is no initial value
232
+ raise ValueError (f"Incompatible change: cannot add required column: { '.' .join (path )} " )
215
233
216
234
# update tracking for moves
217
235
self ._added_name_to_id [full_name ] = new_id
218
236
self ._id_to_parent [new_id ] = parent_full_path
219
237
220
- new_type = assign_fresh_schema_ids (field_type , self .assign_new_column_id )
221
- field = NestedField (field_id = new_id , name = name , field_type = new_type , required = required , doc = doc )
238
+ field = NestedField (
239
+ field_id = new_id ,
240
+ name = name ,
241
+ field_type = new_type ,
242
+ required = required ,
243
+ doc = doc ,
244
+ initial_default = initial_default ,
245
+ write_default = initial_default ,
246
+ )
222
247
223
248
if parent_id in self ._adds :
224
249
self ._adds [parent_id ].append (field )
@@ -250,6 +275,19 @@ def delete_column(self, path: Union[str, Tuple[str, ...]]) -> UpdateSchema:
250
275
251
276
return self
252
277
278
+ def set_default_value (self , path : Union [str , Tuple [str , ...]], default_value : Optional [L ]) -> UpdateSchema :
279
+ """Set the default value of a column.
280
+
281
+ Args:
282
+ path: The path to the column.
283
+
284
+ Returns:
285
+ The UpdateSchema with the delete operation staged.
286
+ """
287
+ self ._set_column_default_value (path , default_value )
288
+
289
+ return self
290
+
253
291
def rename_column (self , path_from : Union [str , Tuple [str , ...]], new_name : str ) -> UpdateSchema :
254
292
"""Update the name of a column.
255
293
@@ -273,6 +311,8 @@ def rename_column(self, path_from: Union[str, Tuple[str, ...]], new_name: str) -
273
311
field_type = updated .field_type ,
274
312
doc = updated .doc ,
275
313
required = updated .required ,
314
+ initial_default = updated .initial_default ,
315
+ write_default = updated .write_default ,
276
316
)
277
317
else :
278
318
self ._updates [field_from .field_id ] = NestedField (
@@ -281,6 +321,8 @@ def rename_column(self, path_from: Union[str, Tuple[str, ...]], new_name: str) -
281
321
field_type = field_from .field_type ,
282
322
doc = field_from .doc ,
283
323
required = field_from .required ,
324
+ initial_default = field_from .initial_default ,
325
+ write_default = field_from .write_default ,
284
326
)
285
327
286
328
# Lookup the field because of casing
@@ -330,6 +372,8 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b
330
372
field_type = updated .field_type ,
331
373
doc = updated .doc ,
332
374
required = required ,
375
+ initial_default = updated .initial_default ,
376
+ write_default = updated .write_default ,
333
377
)
334
378
else :
335
379
self ._updates [field .field_id ] = NestedField (
@@ -338,6 +382,52 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b
338
382
field_type = field .field_type ,
339
383
doc = field .doc ,
340
384
required = required ,
385
+ initial_default = field .initial_default ,
386
+ write_default = field .write_default ,
387
+ )
388
+
389
+ def _set_column_default_value (self , path : Union [str , Tuple [str , ...]], default_value : Any ) -> None :
390
+ path = (path ,) if isinstance (path , str ) else path
391
+ name = "." .join (path )
392
+
393
+ field = self ._schema .find_field (name , self ._case_sensitive )
394
+
395
+ if default_value is not None :
396
+ try :
397
+ # To make sure that the value is valid for the type
398
+ default_value = literal (default_value ).to (field .field_type ).value
399
+ except ValueError as e :
400
+ raise ValueError (f"Invalid default value: { e } " ) from e
401
+
402
+ if field .required and default_value == field .write_default :
403
+ # if the change is a noop, allow it even if allowIncompatibleChanges is false
404
+ return
405
+
406
+ if not self ._allow_incompatible_changes and field .required and default_value is None :
407
+ raise ValueError ("Cannot change change default-value of a required column to None" )
408
+
409
+ if field .field_id in self ._deletes :
410
+ raise ValueError (f"Cannot update a column that will be deleted: { name } " )
411
+
412
+ if updated := self ._updates .get (field .field_id ):
413
+ self ._updates [field .field_id ] = NestedField (
414
+ field_id = updated .field_id ,
415
+ name = updated .name ,
416
+ field_type = updated .field_type ,
417
+ doc = updated .doc ,
418
+ required = updated .required ,
419
+ initial_default = updated .initial_default ,
420
+ write_default = default_value ,
421
+ )
422
+ else :
423
+ self ._updates [field .field_id ] = NestedField (
424
+ field_id = field .field_id ,
425
+ name = field .name ,
426
+ field_type = field .field_type ,
427
+ doc = field .doc ,
428
+ required = field .required ,
429
+ initial_default = field .initial_default ,
430
+ write_default = default_value ,
341
431
)
342
432
343
433
def update_column (
@@ -387,6 +477,8 @@ def update_column(
387
477
field_type = field_type or updated .field_type ,
388
478
doc = doc if doc is not None else updated .doc ,
389
479
required = updated .required ,
480
+ initial_default = updated .initial_default ,
481
+ write_default = updated .write_default ,
390
482
)
391
483
else :
392
484
self ._updates [field .field_id ] = NestedField (
@@ -395,6 +487,8 @@ def update_column(
395
487
field_type = field_type or field .field_type ,
396
488
doc = doc if doc is not None else field .doc ,
397
489
required = field .required ,
490
+ initial_default = field .initial_default ,
491
+ write_default = field .write_default ,
398
492
)
399
493
400
494
if required is not None :
@@ -636,19 +730,35 @@ def struct(self, struct: StructType, field_results: List[Optional[IcebergType]])
636
730
name = field .name
637
731
doc = field .doc
638
732
required = field .required
733
+ write_default = field .write_default
639
734
640
735
# There is an update
641
736
if update := self ._updates .get (field .field_id ):
642
737
name = update .name
643
738
doc = update .doc
644
739
required = update .required
645
-
646
- if field .name == name and field .field_type == result_type and field .required == required and field .doc == doc :
740
+ write_default = update .write_default
741
+
742
+ if (
743
+ field .name == name
744
+ and field .field_type == result_type
745
+ and field .required == required
746
+ and field .doc == doc
747
+ and field .write_default == write_default
748
+ ):
647
749
new_fields .append (field )
648
750
else :
649
751
has_changes = True
650
752
new_fields .append (
651
- NestedField (field_id = field .field_id , name = name , field_type = result_type , required = required , doc = doc )
753
+ NestedField (
754
+ field_id = field .field_id ,
755
+ name = name ,
756
+ field_type = result_type ,
757
+ required = required ,
758
+ doc = doc ,
759
+ initial_default = field .initial_default ,
760
+ write_default = write_default ,
761
+ )
652
762
)
653
763
654
764
if has_changes :
0 commit comments