@@ -38,7 +38,7 @@ def run_spark_commands(spark: SparkSession, sqls: List[str]) -> None:
38
38
@pytest .mark .integration
39
39
@pytest .mark .parametrize ("format_version" , [1 , 2 ])
40
40
def test_partitioned_table_delete_full_file (spark : SparkSession , session_catalog : RestCatalog , format_version : int ) -> None :
41
- identifier = ' default.table_partitioned_delete'
41
+ identifier = " default.table_partitioned_delete"
42
42
43
43
run_spark_commands (
44
44
spark ,
@@ -66,14 +66,14 @@ def test_partitioned_table_delete_full_file(spark: SparkSession, session_catalog
66
66
tbl .delete (EqualTo ("number_partitioned" , 10 ))
67
67
68
68
# No overwrite operation
69
- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' , ' append' , ' delete' ]
70
- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [11 , 11 ], ' number' : [20 , 30 ]}
69
+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" , " append" , " delete" ]
70
+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [11 , 11 ], " number" : [20 , 30 ]}
71
71
72
72
73
73
@pytest .mark .integration
74
74
@pytest .mark .parametrize ("format_version" , [1 , 2 ])
75
75
def test_partitioned_table_rewrite (spark : SparkSession , session_catalog : RestCatalog , format_version : int ) -> None :
76
- identifier = ' default.table_partitioned_delete'
76
+ identifier = " default.table_partitioned_delete"
77
77
78
78
run_spark_commands (
79
79
spark ,
@@ -101,14 +101,14 @@ def test_partitioned_table_rewrite(spark: SparkSession, session_catalog: RestCat
101
101
tbl .delete (EqualTo ("number" , 20 ))
102
102
103
103
# We don't delete a whole partition, so there is only a overwrite
104
- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' , ' append' , ' overwrite' ]
105
- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [11 , 10 ], ' number' : [30 , 30 ]}
104
+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" , " append" , " overwrite" ]
105
+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [11 , 10 ], " number" : [30 , 30 ]}
106
106
107
107
108
108
@pytest .mark .integration
109
109
@pytest .mark .parametrize ("format_version" , [1 , 2 ])
110
110
def test_partitioned_table_no_match (spark : SparkSession , session_catalog : RestCatalog , format_version : int ) -> None :
111
- identifier = ' default.table_partitioned_delete'
111
+ identifier = " default.table_partitioned_delete"
112
112
113
113
run_spark_commands (
114
114
spark ,
@@ -132,13 +132,13 @@ def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCa
132
132
tbl = session_catalog .load_table (identifier )
133
133
tbl .delete (EqualTo ("number_partitioned" , 22 )) # Does not affect any data
134
134
135
- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' ]
136
- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [10 , 10 ], ' number' : [20 , 30 ]}
135
+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" ]
136
+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [10 , 10 ], " number" : [20 , 30 ]}
137
137
138
138
139
139
@pytest .mark .integration
140
140
def test_partitioned_table_positional_deletes (spark : SparkSession , session_catalog : RestCatalog ) -> None :
141
- identifier = ' default.table_partitioned_delete'
141
+ identifier = " default.table_partitioned_delete"
142
142
143
143
run_spark_commands (
144
144
spark ,
@@ -180,13 +180,13 @@ def test_partitioned_table_positional_deletes(spark: SparkSession, session_catal
180
180
181
181
# One positional delete has been added, but an OVERWRITE status is set
182
182
# https://github.com/apache/iceberg/issues/10122
183
- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' , ' overwrite' , ' overwrite' ]
184
- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [10 ], ' number' : [20 ]}
183
+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" , " overwrite" , " overwrite" ]
184
+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [10 ], " number" : [20 ]}
185
185
186
186
187
187
@pytest .mark .integration
188
188
def test_partitioned_table_positional_deletes_sequence_number (spark : SparkSession , session_catalog : RestCatalog ) -> None :
189
- identifier = ' default.table_partitioned_delete_sequence_number'
189
+ identifier = " default.table_partitioned_delete_sequence_number"
190
190
191
191
# This test case is a bit more complex. Here we run a MoR delete on a file, we make sure that
192
192
# the manifest gets rewritten (but not the data file with a MoR), and check if the delete is still there
@@ -234,40 +234,40 @@ def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSessio
234
234
assert len (snapshots ) == 3
235
235
236
236
# Snapshots produced by Spark
237
- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()[0 :2 ]] == [' append' , ' overwrite' ]
237
+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()[0 :2 ]] == [" append" , " overwrite" ]
238
238
239
239
# Will rewrite one parquet file
240
240
assert snapshots [2 ].summary == Summary (
241
241
Operation .OVERWRITE ,
242
242
** {
243
- ' added-files-size' : ' 1145' ,
244
- ' added-data-files' : '1' ,
245
- ' added-records' : '2' ,
246
- ' changed-partition-count' : '1' ,
247
- ' total-files-size' : snapshots [2 ].summary [' total-files-size' ],
248
- ' total-delete-files' : '0' ,
249
- ' total-data-files' : '1' ,
250
- ' total-position-deletes' : '0' ,
251
- ' total-records' : '2' ,
252
- ' total-equality-deletes' : '0' ,
253
- ' deleted-data-files' : '2' ,
254
- ' removed-delete-files' : '1' ,
255
- ' deleted-records' : '5' ,
256
- ' removed-files-size' : snapshots [2 ].summary [' removed-files-size' ],
257
- ' removed-position-deletes' : '1' ,
243
+ " added-files-size" : " 1145" ,
244
+ " added-data-files" : "1" ,
245
+ " added-records" : "2" ,
246
+ " changed-partition-count" : "1" ,
247
+ " total-files-size" : snapshots [2 ].summary [" total-files-size" ],
248
+ " total-delete-files" : "0" ,
249
+ " total-data-files" : "1" ,
250
+ " total-position-deletes" : "0" ,
251
+ " total-records" : "2" ,
252
+ " total-equality-deletes" : "0" ,
253
+ " deleted-data-files" : "2" ,
254
+ " removed-delete-files" : "1" ,
255
+ " deleted-records" : "5" ,
256
+ " removed-files-size" : snapshots [2 ].summary [" removed-files-size" ],
257
+ " removed-position-deletes" : "1" ,
258
258
},
259
259
)
260
260
261
- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [20 , 20 , 10 ], ' number' : [200 , 202 , 100 ]}
261
+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [20 , 20 , 10 ], " number" : [200 , 202 , 100 ]}
262
262
263
263
264
264
@pytest .mark .integration
265
265
def test_delete_no_match (session_catalog : RestCatalog ) -> None :
266
266
arrow_schema = pa .schema ([pa .field ("ints" , pa .int32 ())])
267
267
arrow_tbl = pa .Table .from_pylist (
268
268
[
269
- {' ints' : 1 },
270
- {' ints' : 3 },
269
+ {" ints" : 1 },
270
+ {" ints" : 3 },
271
271
],
272
272
schema = arrow_schema ,
273
273
)
@@ -286,7 +286,7 @@ def test_delete_no_match(session_catalog: RestCatalog) -> None:
286
286
287
287
assert [snapshot .summary .operation for snapshot in tbl .snapshots ()] == [Operation .APPEND ]
288
288
289
- tbl .delete (' ints == 2' ) # Only 1 and 3 in the file, but is between the lower and upper bound
289
+ tbl .delete (" ints == 2" ) # Only 1 and 3 in the file, but is between the lower and upper bound
290
290
291
291
assert [snapshot .summary .operation for snapshot in tbl .snapshots ()] == [Operation .APPEND ]
292
292
@@ -296,8 +296,8 @@ def test_delete_overwrite(session_catalog: RestCatalog) -> None:
296
296
arrow_schema = pa .schema ([pa .field ("ints" , pa .int32 ())])
297
297
arrow_tbl = pa .Table .from_pylist (
298
298
[
299
- {' ints' : 1 },
300
- {' ints' : 2 },
299
+ {" ints" : 1 },
300
+ {" ints" : 2 },
301
301
],
302
302
schema = arrow_schema ,
303
303
)
@@ -318,28 +318,28 @@ def test_delete_overwrite(session_catalog: RestCatalog) -> None:
318
318
319
319
arrow_tbl_overwrite = pa .Table .from_pylist (
320
320
[
321
- {' ints' : 3 },
322
- {' ints' : 4 },
321
+ {" ints" : 3 },
322
+ {" ints" : 4 },
323
323
],
324
324
schema = arrow_schema ,
325
325
)
326
- tbl .overwrite (arrow_tbl_overwrite , ' ints == 2' ) # Should rewrite one file
326
+ tbl .overwrite (arrow_tbl_overwrite , " ints == 2" ) # Should rewrite one file
327
327
328
328
assert [snapshot .summary .operation for snapshot in tbl .snapshots ()] == [
329
329
Operation .APPEND ,
330
330
Operation .OVERWRITE ,
331
331
Operation .APPEND ,
332
332
]
333
333
334
- assert tbl .scan ().to_arrow ()[' ints' ].to_pylist () == [3 , 4 , 1 ]
334
+ assert tbl .scan ().to_arrow ()[" ints" ].to_pylist () == [3 , 4 , 1 ]
335
335
336
336
337
337
@pytest .mark .integration
338
338
def test_delete_truncate (session_catalog : RestCatalog ) -> None :
339
339
arrow_schema = pa .schema ([pa .field ("ints" , pa .int32 ())])
340
340
arrow_tbl = pa .Table .from_pylist (
341
341
[
342
- {' ints' : 1 },
342
+ {" ints" : 1 },
343
343
],
344
344
schema = arrow_schema ,
345
345
)
0 commit comments