17
17
# pylint:disable=redefined-outer-name
18
18
19
19
from datetime import date
20
- from typing import Optional
20
+ from typing import Iterator , Optional
21
21
22
22
import pyarrow as pa
23
23
import pyarrow .parquet as pq
@@ -122,8 +122,13 @@ def _create_table(
122
122
return tbl
123
123
124
124
125
+ @pytest .fixture (name = "format_version" , params = [pytest .param (1 , id = "format_version=1" ), pytest .param (2 , id = "format_version=2" )])
126
+ def format_version_fixure (request : pytest .FixtureRequest ) -> Iterator [int ]:
127
+ """Fixture to run tests with different table format versions."""
128
+ yield request .param
129
+
130
+
125
131
@pytest .mark .integration
126
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
127
132
def test_add_files_to_unpartitioned_table (spark : SparkSession , session_catalog : Catalog , format_version : int ) -> None :
128
133
identifier = f"default.unpartitioned_table_v{ format_version } "
129
134
tbl = _create_table (session_catalog , identifier , format_version )
@@ -163,7 +168,6 @@ def test_add_files_to_unpartitioned_table(spark: SparkSession, session_catalog:
163
168
164
169
165
170
@pytest .mark .integration
166
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
167
171
def test_add_files_to_unpartitioned_table_raises_file_not_found (
168
172
spark : SparkSession , session_catalog : Catalog , format_version : int
169
173
) -> None :
@@ -184,7 +188,6 @@ def test_add_files_to_unpartitioned_table_raises_file_not_found(
184
188
185
189
186
190
@pytest .mark .integration
187
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
188
191
def test_add_files_to_unpartitioned_table_raises_has_field_ids (
189
192
spark : SparkSession , session_catalog : Catalog , format_version : int
190
193
) -> None :
@@ -205,7 +208,6 @@ def test_add_files_to_unpartitioned_table_raises_has_field_ids(
205
208
206
209
207
210
@pytest .mark .integration
208
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
209
211
def test_add_files_to_unpartitioned_table_with_schema_updates (
210
212
spark : SparkSession , session_catalog : Catalog , format_version : int
211
213
) -> None :
@@ -263,7 +265,6 @@ def test_add_files_to_unpartitioned_table_with_schema_updates(
263
265
264
266
265
267
@pytest .mark .integration
266
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
267
268
def test_add_files_to_partitioned_table (spark : SparkSession , session_catalog : Catalog , format_version : int ) -> None :
268
269
identifier = f"default.partitioned_table_v{ format_version } "
269
270
@@ -335,7 +336,6 @@ def test_add_files_to_partitioned_table(spark: SparkSession, session_catalog: Ca
335
336
336
337
337
338
@pytest .mark .integration
338
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
339
339
def test_add_files_to_bucket_partitioned_table_fails (spark : SparkSession , session_catalog : Catalog , format_version : int ) -> None :
340
340
identifier = f"default.partitioned_table_bucket_fails_v{ format_version } "
341
341
@@ -378,7 +378,6 @@ def test_add_files_to_bucket_partitioned_table_fails(spark: SparkSession, sessio
378
378
379
379
380
380
@pytest .mark .integration
381
- @pytest .mark .parametrize ("format_version" , [1 , 2 ])
382
381
def test_add_files_to_partitioned_table_fails_with_lower_and_upper_mismatch (
383
382
spark : SparkSession , session_catalog : Catalog , format_version : int
384
383
) -> None :
@@ -424,3 +423,28 @@ def test_add_files_to_partitioned_table_fails_with_lower_and_upper_mismatch(
424
423
"Cannot infer partition value from parquet metadata as there are more than one partition values for Partition Field: baz. lower_value=123, upper_value=124"
425
424
in str (exc_info .value )
426
425
)
426
+
427
+
428
+ @pytest .mark .integration
429
+ def test_add_files_snapshot_properties (spark : SparkSession , session_catalog : Catalog , format_version : int ) -> None :
430
+ identifier = f"default.unpartitioned_table_v{ format_version } "
431
+ tbl = _create_table (session_catalog , identifier , format_version )
432
+
433
+ file_paths = [f"s3://warehouse/default/unpartitioned/v{ format_version } /test-{ i } .parquet" for i in range (5 )]
434
+ # write parquet files
435
+ for file_path in file_paths :
436
+ fo = tbl .io .new_output (file_path )
437
+ with fo .create (overwrite = True ) as fos :
438
+ with pq .ParquetWriter (fos , schema = ARROW_SCHEMA ) as writer :
439
+ writer .write_table (ARROW_TABLE )
440
+
441
+ # add the parquet files as data files
442
+ tbl .add_files (file_paths = file_paths , snapshot_properties = {"snapshot_prop_a" : "test_prop_a" })
443
+
444
+ # NameMapping must have been set to enable reads
445
+ assert tbl .name_mapping () is not None
446
+
447
+ summary = spark .sql (f"SELECT * FROM { identifier } .snapshots;" ).collect ()[0 ].summary
448
+
449
+ assert "snapshot_prop_a" in summary
450
+ assert summary ["snapshot_prop_a" ] == "test_prop_a"
0 commit comments