1515# specific language governing permissions and limitations
1616# under the License.
1717import gzip
18- import os
18+ from pathlib import Path
1919
2020import numpy as np
2121import pyarrow as pa
@@ -47,9 +47,8 @@ def test_register_csv(ctx, tmp_path):
4747 )
4848 write_csv (table , path )
4949
50- with open (path , "rb" ) as csv_file :
51- with gzip .open (gzip_path , "wb" ) as gzipped_file :
52- gzipped_file .writelines (csv_file )
50+ with Path .open (path , "rb" ) as csv_file , gzip .open (gzip_path , "wb" ) as gzipped_file :
51+ gzipped_file .writelines (csv_file )
5352
5453 ctx .register_csv ("csv" , path )
5554 ctx .register_csv ("csv1" , str (path ))
@@ -158,7 +157,7 @@ def test_register_parquet(ctx, tmp_path):
158157 assert result .to_pydict () == {"cnt" : [100 ]}
159158
160159
161- @pytest .mark .parametrize ("path_to_str" , ( True , False ) )
160+ @pytest .mark .parametrize ("path_to_str" , [ True , False ] )
162161def test_register_parquet_partitioned (ctx , tmp_path , path_to_str ):
163162 dir_root = tmp_path / "dataset_parquet_partitioned"
164163 dir_root .mkdir (exist_ok = False )
@@ -194,7 +193,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
194193 assert dict (zip (rd ["grp" ], rd ["cnt" ])) == {"a" : 3 , "b" : 1 }
195194
196195
197- @pytest .mark .parametrize ("path_to_str" , ( True , False ) )
196+ @pytest .mark .parametrize ("path_to_str" , [ True , False ] )
198197def test_register_dataset (ctx , tmp_path , path_to_str ):
199198 path = helpers .write_parquet (tmp_path / "a.parquet" , helpers .data ())
200199 path = str (path ) if path_to_str else path
@@ -209,13 +208,15 @@ def test_register_dataset(ctx, tmp_path, path_to_str):
209208
210209
211210def test_register_json (ctx , tmp_path ):
212- path = os . path . dirname ( os . path . abspath ( __file__ ) )
213- test_data_path = os . path . join (path , "data_test_context" , "data.json" )
211+ path = Path ( __file__ ). parent . resolve ( )
212+ test_data_path = Path (path ) / "data_test_context" / "data.json"
214213 gzip_path = tmp_path / "data.json.gz"
215214
216- with open (test_data_path , "rb" ) as json_file :
217- with gzip .open (gzip_path , "wb" ) as gzipped_file :
218- gzipped_file .writelines (json_file )
215+ with (
216+ Path .open (test_data_path , "rb" ) as json_file ,
217+ gzip .open (gzip_path , "wb" ) as gzipped_file
218+ ):
219+ gzipped_file .writelines (json_file )
219220
220221 ctx .register_json ("json" , test_data_path )
221222 ctx .register_json ("json1" , str (test_data_path ))
@@ -470,16 +471,19 @@ def test_simple_select(ctx, tmp_path, arr):
470471 # In DF 43.0.0 we now default to having BinaryView and StringView
471472 # so the array that is saved to the parquet is slightly different
472473 # than the array read. Convert to values for comparison.
473- if isinstance (result , pa .BinaryViewArray ) or isinstance ( result , pa .StringViewArray ):
474+ if isinstance (result , ( pa .BinaryViewArray , pa .StringViewArray ) ):
474475 arr = arr .tolist ()
475476 result = result .tolist ()
476477
477478 np .testing .assert_equal (result , arr )
478479
479480
480- @pytest .mark .parametrize ("file_sort_order" , (None , [[col ("int" ).sort (True , True )]]))
481- @pytest .mark .parametrize ("pass_schema" , (True , False ))
482- @pytest .mark .parametrize ("path_to_str" , (True , False ))
481+ @pytest .mark .parametrize ("file_sort_order" , [
482+ None ,
483+ [[col ("int" ).sort (ascending = True , nulls_first = True )]]
484+ ])
485+ @pytest .mark .parametrize ("pass_schema" , [True , False ])
486+ @pytest .mark .parametrize ("path_to_str" , [True , False ])
483487def test_register_listing_table (
484488 ctx , tmp_path , pass_schema , file_sort_order , path_to_str
485489):
@@ -528,7 +532,7 @@ def test_register_listing_table(
528532 assert dict (zip (rd ["grp" ], rd ["count" ])) == {"a" : 5 , "b" : 2 }
529533
530534 result = ctx .sql (
531- "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"
535+ "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp" # noqa: E501
532536 ).collect ()
533537 result = pa .Table .from_batches (result )
534538
0 commit comments