44
55import adbc_driver_manager .dbapi
66import numpy as np
7- import pyarrow
87import pytest
98
109import adbc_driver_duckdb .dbapi
1110
11+ if sys .version_info >= (3 , 13 ):
12+ pytest .skip ("Pyarrow not available on Python 3.13+" , allow_module_level = True )
13+ else :
14+ import pyarrow as pa
15+
1216xfail = pytest .mark .xfail
1317driver_path = adbc_driver_duckdb .driver_path ()
1418
@@ -20,7 +24,7 @@ def duck_conn():
2024
2125
2226def example_table ():
23- return pyarrow .table (
27+ return pa .table (
2428 [
2529 [1 , 2 , 3 , 4 ],
2630 ["a" , "b" , None , "d" ],
@@ -145,7 +149,7 @@ def test_connection_get_table_schema(duck_conn):
145149 with duck_conn .cursor () as cursor :
146150 # Test Default Schema
147151 cursor .execute ("CREATE TABLE tableschema (ints BIGINT)" )
148- assert duck_conn .adbc_get_table_schema ("tableschema" ) == pyarrow .schema (
152+ assert duck_conn .adbc_get_table_schema ("tableschema" ) == pa .schema (
149153 [
150154 ("ints" , "int64" ),
151155 ]
@@ -154,12 +158,12 @@ def test_connection_get_table_schema(duck_conn):
154158 # Test Given Schema
155159 cursor .execute ("CREATE SCHEMA test;" )
156160 cursor .execute ("CREATE TABLE test.tableschema (test_ints BIGINT)" )
157- assert duck_conn .adbc_get_table_schema ("tableschema" , db_schema_filter = "test" ) == pyarrow .schema (
161+ assert duck_conn .adbc_get_table_schema ("tableschema" , db_schema_filter = "test" ) == pa .schema (
158162 [
159163 ("test_ints" , "int64" ),
160164 ]
161165 )
162- assert duck_conn .adbc_get_table_schema ("tableschema" ) == pyarrow .schema (
166+ assert duck_conn .adbc_get_table_schema ("tableschema" ) == pa .schema (
163167 [
164168 ("ints" , "int64" ),
165169 ]
@@ -175,14 +179,14 @@ def test_connection_get_table_schema(duck_conn):
175179 # Catalog and DB Schema name
176180 assert duck_conn .adbc_get_table_schema (
177181 "tableschema" , catalog_filter = "memory" , db_schema_filter = "test"
178- ) == pyarrow .schema (
182+ ) == pa .schema (
179183 [
180184 ("test_ints" , "int64" ),
181185 ]
182186 )
183187
184188 # DB Schema is inferred to be "main" if unspecified
185- assert duck_conn .adbc_get_table_schema ("tableschema" , catalog_filter = "memory" ) == pyarrow .schema (
189+ assert duck_conn .adbc_get_table_schema ("tableschema" , catalog_filter = "memory" ) == pa .schema (
186190 [
187191 ("ints" , "int64" ),
188192 ]
@@ -285,19 +289,19 @@ def test_large_chunk(tmp_path):
285289 chunk_size = 10_000
286290
287291 # Create data for each chunk
288- chunks_col1 = [pyarrow .array (np .random .randint (0 , 100 , chunk_size )) for _ in range (num_chunks )]
289- chunks_col2 = [pyarrow .array (np .random .rand (chunk_size )) for _ in range (num_chunks )]
292+ chunks_col1 = [pa .array (np .random .randint (0 , 100 , chunk_size )) for _ in range (num_chunks )]
293+ chunks_col2 = [pa .array (np .random .rand (chunk_size )) for _ in range (num_chunks )]
290294 chunks_col3 = [
291- pyarrow .array ([f"str_{ i } " for i in range (j * chunk_size , (j + 1 ) * chunk_size )]) for j in range (num_chunks )
295+ pa .array ([f"str_{ i } " for i in range (j * chunk_size , (j + 1 ) * chunk_size )]) for j in range (num_chunks )
292296 ]
293297
294298 # Create chunked arrays
295- col1 = pyarrow .chunked_array (chunks_col1 )
296- col2 = pyarrow .chunked_array (chunks_col2 )
297- col3 = pyarrow .chunked_array (chunks_col3 )
299+ col1 = pa .chunked_array (chunks_col1 )
300+ col2 = pa .chunked_array (chunks_col2 )
301+ col3 = pa .chunked_array (chunks_col3 )
298302
299303 # Create the table
300- table = pyarrow .table ([col1 , col2 , col3 ], names = ["ints" , "floats" , "strings" ])
304+ table = pa .table ([col1 , col2 , col3 ], names = ["ints" , "floats" , "strings" ])
301305
302306 db = Path (tmp_path ) / "tmp.db"
303307 if db .exists ():
@@ -320,11 +324,11 @@ def test_large_chunk(tmp_path):
320324def test_dictionary_data (tmp_path ):
321325 data = ["apple" , "banana" , "apple" , "orange" , "banana" , "banana" ]
322326
323- dict_type = pyarrow .dictionary (index_type = pyarrow .int32 (), value_type = pyarrow .string ())
324- dict_array = pyarrow .array (data , type = dict_type )
327+ dict_type = pa .dictionary (index_type = pa .int32 (), value_type = pa .string ())
328+ dict_array = pa .array (data , type = dict_type )
325329
326330 # Wrap in a table
327- table = pyarrow .table ({"fruits" : dict_array })
331+ table = pa .table ({"fruits" : dict_array })
328332 db = Path (tmp_path ) / "tmp.db"
329333 if db .exists ():
330334 db .unlink ()
@@ -346,12 +350,12 @@ def test_dictionary_data(tmp_path):
346350
347351
348352def test_ree_data (tmp_path ):
349- run_ends = pyarrow .array ([3 , 5 , 6 ], type = pyarrow .int32 ()) # positions: [0-2], [3-4], [5]
350- values = pyarrow .array (["apple" , "banana" , "orange" ], type = pyarrow .string ())
353+ run_ends = pa .array ([3 , 5 , 6 ], type = pa .int32 ()) # positions: [0-2], [3-4], [5]
354+ values = pa .array (["apple" , "banana" , "orange" ], type = pa .string ())
351355
352- ree_array = pyarrow .RunEndEncodedArray .from_arrays (run_ends , values )
356+ ree_array = pa .RunEndEncodedArray .from_arrays (run_ends , values )
353357
354- table = pyarrow .table ({"fruits" : ree_array })
358+ table = pa .table ({"fruits" : ree_array })
355359
356360 db = Path (tmp_path ) / "tmp.db"
357361 if db .exists ():
0 commit comments