@@ -2382,10 +2382,122 @@ def arrow_table_date_timestamps() -> "pa.Table":
23822382
23832383
23842384@pytest .fixture (scope = "session" )
2385- def arrow_table_date_timestamps_schema () -> Schema :
2386- """Pyarrow table Schema with only date, timestamp and timestamptz values."""
2385+ def table_date_timestamps_schema () -> Schema :
2386+ """Iceberg table Schema with only date, timestamp and timestamptz values."""
23872387 return Schema (
23882388 NestedField (field_id = 1 , name = "date" , field_type = DateType (), required = False ),
23892389 NestedField (field_id = 2 , name = "timestamp" , field_type = TimestampType (), required = False ),
23902390 NestedField (field_id = 3 , name = "timestamptz" , field_type = TimestamptzType (), required = False ),
23912391 )
2392+
2393+
2394+ @pytest .fixture (scope = "session" )
2395+ def arrow_table_schema_with_all_timestamp_precisions () -> "pa.Schema" :
2396+ """Pyarrow Schema with all supported timestamp types."""
2397+ import pyarrow as pa
2398+
2399+ return pa .schema ([
2400+ ("timestamp_s" , pa .timestamp (unit = "s" )),
2401+ ("timestamptz_s" , pa .timestamp (unit = "s" , tz = "UTC" )),
2402+ ("timestamp_ms" , pa .timestamp (unit = "ms" )),
2403+ ("timestamptz_ms" , pa .timestamp (unit = "ms" , tz = "UTC" )),
2404+ ("timestamp_us" , pa .timestamp (unit = "us" )),
2405+ ("timestamptz_us" , pa .timestamp (unit = "us" , tz = "UTC" )),
2406+ ("timestamp_ns" , pa .timestamp (unit = "ns" )),
2407+ ("timestamptz_ns" , pa .timestamp (unit = "ns" , tz = "UTC" )),
2408+ ("timestamptz_us_etc_utc" , pa .timestamp (unit = "us" , tz = "Etc/UTC" )),
2409+ ("timestamptz_ns_z" , pa .timestamp (unit = "ns" , tz = "Z" )),
2410+ ("timestamptz_s_0000" , pa .timestamp (unit = "s" , tz = "+00:00" )),
2411+ ])
2412+
2413+
2414+ @pytest .fixture (scope = "session" )
2415+ def arrow_table_with_all_timestamp_precisions (arrow_table_schema_with_all_timestamp_precisions : "pa.Schema" ) -> "pa.Table" :
2416+ """Pyarrow table with all supported timestamp types."""
2417+ import pandas as pd
2418+ import pyarrow as pa
2419+
2420+ test_data = pd .DataFrame ({
2421+ "timestamp_s" : [datetime (2023 , 1 , 1 , 19 , 25 , 00 ), None , datetime (2023 , 3 , 1 , 19 , 25 , 00 )],
2422+ "timestamptz_s" : [
2423+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2424+ None ,
2425+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2426+ ],
2427+ "timestamp_ms" : [datetime (2023 , 1 , 1 , 19 , 25 , 00 ), None , datetime (2023 , 3 , 1 , 19 , 25 , 00 )],
2428+ "timestamptz_ms" : [
2429+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2430+ None ,
2431+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2432+ ],
2433+ "timestamp_us" : [datetime (2023 , 1 , 1 , 19 , 25 , 00 ), None , datetime (2023 , 3 , 1 , 19 , 25 , 00 )],
2434+ "timestamptz_us" : [
2435+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2436+ None ,
2437+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2438+ ],
2439+ "timestamp_ns" : [
2440+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 6 ),
2441+ None ,
2442+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 7 ),
2443+ ],
2444+ "timestamptz_ns" : [
2445+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2446+ None ,
2447+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2448+ ],
2449+ "timestamptz_us_etc_utc" : [
2450+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2451+ None ,
2452+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2453+ ],
2454+ "timestamptz_ns_z" : [
2455+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 6 , tz = "UTC" ),
2456+ None ,
2457+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 7 , tz = "UTC" ),
2458+ ],
2459+ "timestamptz_s_0000" : [
2460+ datetime (2023 , 1 , 1 , 19 , 25 , 1 , tzinfo = timezone .utc ),
2461+ None ,
2462+ datetime (2023 , 3 , 1 , 19 , 25 , 1 , tzinfo = timezone .utc ),
2463+ ],
2464+ })
2465+ return pa .Table .from_pandas (test_data , schema = arrow_table_schema_with_all_timestamp_precisions )
2466+
2467+
2468+ @pytest .fixture (scope = "session" )
2469+ def arrow_table_schema_with_all_microseconds_timestamp_precisions () -> "pa.Schema" :
2470+ """Pyarrow Schema with all microseconds timestamp."""
2471+ import pyarrow as pa
2472+
2473+ return pa .schema ([
2474+ ("timestamp_s" , pa .timestamp (unit = "us" )),
2475+ ("timestamptz_s" , pa .timestamp (unit = "us" , tz = "UTC" )),
2476+ ("timestamp_ms" , pa .timestamp (unit = "us" )),
2477+ ("timestamptz_ms" , pa .timestamp (unit = "us" , tz = "UTC" )),
2478+ ("timestamp_us" , pa .timestamp (unit = "us" )),
2479+ ("timestamptz_us" , pa .timestamp (unit = "us" , tz = "UTC" )),
2480+ ("timestamp_ns" , pa .timestamp (unit = "us" )),
2481+ ("timestamptz_ns" , pa .timestamp (unit = "us" , tz = "UTC" )),
2482+ ("timestamptz_us_etc_utc" , pa .timestamp (unit = "us" , tz = "UTC" )),
2483+ ("timestamptz_ns_z" , pa .timestamp (unit = "us" , tz = "UTC" )),
2484+ ("timestamptz_s_0000" , pa .timestamp (unit = "us" , tz = "UTC" )),
2485+ ])
2486+
2487+
2488+ @pytest .fixture (scope = "session" )
2489+ def table_schema_with_all_microseconds_timestamp_precision () -> Schema :
2490+ """Iceberg table Schema with only date, timestamp and timestamptz values."""
2491+ return Schema (
2492+ NestedField (field_id = 1 , name = "timestamp_s" , field_type = TimestampType (), required = False ),
2493+ NestedField (field_id = 2 , name = "timestamptz_s" , field_type = TimestamptzType (), required = False ),
2494+ NestedField (field_id = 3 , name = "timestamp_ms" , field_type = TimestampType (), required = False ),
2495+ NestedField (field_id = 4 , name = "timestamptz_ms" , field_type = TimestamptzType (), required = False ),
2496+ NestedField (field_id = 5 , name = "timestamp_us" , field_type = TimestampType (), required = False ),
2497+ NestedField (field_id = 6 , name = "timestamptz_us" , field_type = TimestamptzType (), required = False ),
2498+ NestedField (field_id = 7 , name = "timestamp_ns" , field_type = TimestampType (), required = False ),
2499+ NestedField (field_id = 8 , name = "timestamptz_ns" , field_type = TimestamptzType (), required = False ),
2500+ NestedField (field_id = 9 , name = "timestamptz_us_etc_utc" , field_type = TimestamptzType (), required = False ),
2501+ NestedField (field_id = 10 , name = "timestamptz_ns_z" , field_type = TimestamptzType (), required = False ),
2502+ NestedField (field_id = 11 , name = "timestamptz_s_0000" , field_type = TimestamptzType (), required = False ),
2503+ )
0 commit comments