47
47
import boto3
48
48
import pytest
49
49
from moto import mock_aws
50
+ from pydantic_core import to_json
50
51
51
52
from pyiceberg .catalog import Catalog , load_catalog
52
53
from pyiceberg .catalog .noop import NoopCatalog
67
68
)
68
69
from pyiceberg .io .fsspec import FsspecFileIO
69
70
from pyiceberg .manifest import DataFile , FileFormat
71
+ from pyiceberg .partitioning import PartitionField , PartitionSpec
70
72
from pyiceberg .schema import Accessor , Schema
71
73
from pyiceberg .serializers import ToOutputFile
72
74
from pyiceberg .table import FileScanTask , Table
73
75
from pyiceberg .table .metadata import TableMetadataV1 , TableMetadataV2
76
+ from pyiceberg .transforms import DayTransform , IdentityTransform
74
77
from pyiceberg .types import (
75
78
BinaryType ,
76
79
BooleanType ,
@@ -1255,8 +1258,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
1255
1258
{"key" : 15 , "value" : 0 },
1256
1259
],
1257
1260
"lower_bounds" : [
1258
- {"key" : 2 , "value" : b"2020-04-01 00:00 " },
1259
- {"key" : 3 , "value" : b"2020-04-01 00:12 " },
1261
+ {"key" : 2 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1262
+ {"key" : 3 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1260
1263
{"key" : 7 , "value" : b"\x03 \x00 \x00 \x00 " },
1261
1264
{"key" : 8 , "value" : b"\x01 \x00 \x00 \x00 " },
1262
1265
{"key" : 10 , "value" : b"\xf6 (\\ \x8f \xc2 \x05 S\xc0 " },
@@ -1270,8 +1273,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
1270
1273
{"key" : 19 , "value" : b"\x00 \x00 \x00 \x00 \x00 \x00 \x04 \xc0 " },
1271
1274
],
1272
1275
"upper_bounds" : [
1273
- {"key" : 2 , "value" : b"2020-04-30 23:5: " },
1274
- {"key" : 3 , "value" : b"2020-05-01 00:41 " },
1276
+ {"key" : 2 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1277
+ {"key" : 3 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1275
1278
{"key" : 7 , "value" : b"\t \x01 \x00 \x00 " },
1276
1279
{"key" : 8 , "value" : b"\t \x01 \x00 \x00 " },
1277
1280
{"key" : 10 , "value" : b"\xcd \xcc \xcc \xcc \xcc ,_@" },
@@ -1376,8 +1379,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
1376
1379
],
1377
1380
"lower_bounds" : [
1378
1381
{"key" : 1 , "value" : b"\x01 \x00 \x00 \x00 " },
1379
- {"key" : 2 , "value" : b"2020-04-01 00:00 " },
1380
- {"key" : 3 , "value" : b"2020-04-01 00:03 " },
1382
+ {"key" : 2 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1383
+ {"key" : 3 , "value" : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1381
1384
{"key" : 4 , "value" : b"\x00 \x00 \x00 \x00 " },
1382
1385
{"key" : 5 , "value" : b"\x01 \x00 \x00 \x00 " },
1383
1386
{"key" : 6 , "value" : b"N" },
@@ -1396,8 +1399,8 @@ def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str:
1396
1399
],
1397
1400
"upper_bounds" : [
1398
1401
{"key" : 1 , "value" : b"\x01 \x00 \x00 \x00 " },
1399
- {"key" : 2 , "value" : b"2020-04-30 23:5: " },
1400
- {"key" : 3 , "value" : b"2020-05-01 00:1: " },
1402
+ {"key" : 2 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1403
+ {"key" : 3 , "value" : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " },
1401
1404
{"key" : 4 , "value" : b"\x06 \x00 \x00 \x00 " },
1402
1405
{"key" : 5 , "value" : b"c\x00 \x00 \x00 " },
1403
1406
{"key" : 6 , "value" : b"Y" },
@@ -1858,15 +1861,40 @@ def simple_map() -> MapType:
1858
1861
1859
1862
1860
1863
@pytest .fixture (scope = "session" )
1861
- def generated_manifest_entry_file (avro_schema_manifest_entry : Dict [str , Any ]) -> Generator [str , None , None ]:
1864
+ def test_schema () -> Schema :
1865
+ return Schema (
1866
+ NestedField (1 , "VendorID" , IntegerType (), False ), NestedField (2 , "tpep_pickup_datetime" , TimestampType (), False )
1867
+ )
1868
+
1869
+
1870
+ @pytest .fixture (scope = "session" )
1871
+ def test_partition_spec () -> Schema :
1872
+ return PartitionSpec (
1873
+ PartitionField (1 , 1000 , IdentityTransform (), "VendorID" ),
1874
+ PartitionField (2 , 1001 , DayTransform (), "tpep_pickup_day" ),
1875
+ )
1876
+
1877
+
1878
+ @pytest .fixture (scope = "session" )
1879
+ def generated_manifest_entry_file (
1880
+ avro_schema_manifest_entry : Dict [str , Any ], test_schema : Schema , test_partition_spec : PartitionSpec
1881
+ ) -> Generator [str , None , None ]:
1862
1882
from fastavro import parse_schema , writer
1863
1883
1864
1884
parsed_schema = parse_schema (avro_schema_manifest_entry )
1865
1885
1866
1886
with TemporaryDirectory () as tmpdir :
1867
1887
tmp_avro_file = tmpdir + "/manifest.avro"
1868
1888
with open (tmp_avro_file , "wb" ) as out :
1869
- writer (out , parsed_schema , manifest_entry_records )
1889
+ writer (
1890
+ out ,
1891
+ parsed_schema ,
1892
+ manifest_entry_records ,
1893
+ metadata = {
1894
+ "schema" : test_schema .model_dump_json (),
1895
+ "partition-spec" : to_json (test_partition_spec .fields ).decode ("utf-8" ),
1896
+ },
1897
+ )
1870
1898
yield tmp_avro_file
1871
1899
1872
1900
0 commit comments