@@ -368,7 +368,8 @@ def rows_index_with_empty_dataset(
368368) -> Generator [RowsIndex , None , None ]:
369369 with ds_empty_fs .open ("default/train/0000.parquet" ) as f :
370370 with patch ("libcommon.parquet_utils.HTTPFile" , return_value = f ):
371- yield indexer .get_rows_index ("ds_empty" , "default" , "train" )
371+ data_store = f"file://{ ds_empty_fs .local_root_dir } "
372+ yield indexer .get_rows_index ("ds_empty" , "default" , "train" , data_store = data_store )
372373
373374
374375@pytest .fixture
@@ -386,7 +387,8 @@ def rows_index_with_too_big_rows(
386387 )
387388 with ds_sharded_fs .open ("default/train/0003.parquet" ) as f :
388389 with patch ("libcommon.parquet_utils.HTTPFile" , return_value = f ):
389- yield indexer .get_rows_index ("ds_sharded" , "default" , "train" )
390+ data_store = f"file://{ ds_sharded_fs .local_root_dir } "
391+ yield indexer .get_rows_index ("ds_sharded" , "default" , "train" , data_store = data_store )
390392
391393
392394@pytest .fixture
@@ -465,24 +467,18 @@ def test_rows_index_query_with_parquet_metadata(
465467 with pytest .raises (IndexError ):
466468 rows_index_with_parquet_metadata .query (offset = - 1 , length = 2 )
467469
470+ # test the same with page pruning API
471+ import libviewer as lv # type: ignore [import-untyped]
468472
469- def test_rows_index_query_with_page_pruning (rows_index_with_parquet_metadata : RowsIndex , ds_sharded : Dataset ) -> None :
470- from libviewer import Dataset as LibviewerDataset # type: ignore [import-untyped]
471-
472- assert isinstance (rows_index_with_parquet_metadata .viewer_index , LibviewerDataset )
473-
473+ assert isinstance (rows_index_with_parquet_metadata .viewer_index , lv .Dataset )
474474 result = rows_index_with_parquet_metadata .query_with_page_pruning (offset = 1 , length = 3 )
475475 assert result .to_pydict () == ds_sharded [1 :4 ]
476-
477476 result = rows_index_with_parquet_metadata .query_with_page_pruning (offset = 1 , length = 0 )
478477 assert result .to_pydict () == ds_sharded [:0 ]
479-
480478 result = rows_index_with_parquet_metadata .query_with_page_pruning (offset = 999999 , length = 1 )
481479 assert result .to_pydict () == ds_sharded [:0 ]
482-
483480 result = rows_index_with_parquet_metadata .query_with_page_pruning (offset = 1 , length = 99999999 )
484481 assert result .to_pydict () == ds_sharded [1 :]
485-
486482 with pytest .raises (IndexError ):
487483 rows_index_with_parquet_metadata .query_with_page_pruning (offset = 0 , length = - 1 )
488484 with pytest .raises (IndexError ):
@@ -493,13 +489,25 @@ def test_rows_index_query_with_too_big_rows(rows_index_with_too_big_rows: RowsIn
493489 with pytest .raises (TooBigRows ):
494490 rows_index_with_too_big_rows .query (offset = 0 , length = 3 )
495491
492+ # test the same with page pruning API
493+ with pytest .raises (TooBigRows ):
494+ rows_index_with_too_big_rows .query_with_page_pruning (offset = 0 , length = 2 )
495+
496496
497497def test_rows_index_query_with_empty_dataset (rows_index_with_empty_dataset : RowsIndex , ds_sharded : Dataset ) -> None :
498498 assert isinstance (rows_index_with_empty_dataset .parquet_index , ParquetIndexWithMetadata )
499499 assert rows_index_with_empty_dataset .query (offset = 0 , length = 1 ).to_pydict () == ds_sharded [:0 ]
500500 with pytest .raises (IndexError ):
501501 rows_index_with_empty_dataset .query (offset = - 1 , length = 2 )
502502
503+ # test the same with page pruning API
504+ import libviewer as lv # type: ignore [import-untyped]
505+ assert isinstance (rows_index_with_empty_dataset .viewer_index , lv .Dataset )
506+ result = rows_index_with_empty_dataset .query_with_page_pruning (offset = 0 , length = 1 )
507+ assert result .to_pydict () == ds_sharded [:0 ]
508+ with pytest .raises (IndexError ):
509+ rows_index_with_empty_dataset .query_with_page_pruning (offset = - 1 , length = 2 )
510+
503511
504512def test_indexer_schema_mistmatch_error (
505513 indexer : Indexer ,
0 commit comments