8080import org .apache .parquet .io .InputFile ;
8181import org .apache .parquet .io .ParquetDecodingException ;
8282import org .apache .parquet .io .SeekableInputStream ;
83+ import org .apache .parquet .schema .MessageType ;
8384import org .apache .parquet .schema .PrimitiveType ;
8485import org .apache .spark .sql .execution .metric .SQLMetric ;
8586
@@ -578,6 +579,10 @@ private boolean advanceToNextBlock() {
578579 }
579580
580581 public long [] getRowIndices () {
582+ return getRowIndices (blocks );
583+ }
584+
585+ public static long [] getRowIndices (List <BlockMetaData > blocks ) {
581586 long [] rowIndices = new long [blocks .size () * 2 ];
582587 for (int i = 0 , n = blocks .size (); i < n ; i ++) {
583588 BlockMetaData block = blocks .get (i );
@@ -591,7 +596,7 @@ public long[] getRowIndices() {
591596 //
592597 // The reason reflection is used here is that some Spark versions still depend on a
593598 // Parquet version where the method `getRowIndexOffset` is not public.
594- private long getRowIndexOffset (BlockMetaData metaData ) {
599+ public static long getRowIndexOffset (BlockMetaData metaData ) {
595600 try {
596601 Method method = BlockMetaData .class .getMethod ("getRowIndexOffset" );
597602 method .setAccessible (true );
@@ -699,6 +704,35 @@ private static ParquetMetadata readFooter(
699704 }
700705
701706 private List <BlockMetaData > filterRowGroups (List <BlockMetaData > blocks ) {
707+ return filterRowGroups (options , blocks , this );
708+ }
709+
710+ public static List <BlockMetaData > filterRowGroups (
711+ ParquetReadOptions options , List <BlockMetaData > blocks , FileReader fileReader ) {
712+ FilterCompat .Filter recordFilter = options .getRecordFilter ();
713+ if (FilterCompat .isFilteringRequired (recordFilter )) {
714+ // set up data filters based on configured levels
715+ List <RowGroupFilter .FilterLevel > levels = new ArrayList <>();
716+
717+ if (options .useStatsFilter ()) {
718+ levels .add (STATISTICS );
719+ }
720+
721+ if (options .useDictionaryFilter ()) {
722+ levels .add (DICTIONARY );
723+ }
724+
725+ if (options .useBloomFilter ()) {
726+ levels .add (BLOOMFILTER );
727+ }
728+ return RowGroupFilter .filterRowGroups (levels , recordFilter , blocks , fileReader );
729+ }
730+
731+ return blocks ;
732+ }
733+
734+ public static List <BlockMetaData > filterRowGroups (
735+ ParquetReadOptions options , List <BlockMetaData > blocks , MessageType schema ) {
702736 FilterCompat .Filter recordFilter = options .getRecordFilter ();
703737 if (FilterCompat .isFilteringRequired (recordFilter )) {
704738 // set up data filters based on configured levels
@@ -715,7 +749,7 @@ private List<BlockMetaData> filterRowGroups(List<BlockMetaData> blocks) {
715749 if (options .useBloomFilter ()) {
716750 levels .add (BLOOMFILTER );
717751 }
718- return RowGroupFilter .filterRowGroups (levels , recordFilter , blocks , this );
752+ return RowGroupFilter .filterRowGroups (levels , recordFilter , blocks , schema );
719753 }
720754
721755 return blocks ;
0 commit comments