@@ -107,6 +107,8 @@ protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRow
107107 long totalPruneTime = 0 ;
108108 long totalRowgroups = rowGroupScan .getRowGroupReadEntries ().size ();
109109 Stopwatch pruneTimer = Stopwatch .createUnstarted ();
110+ int countMatchClassCastExceptions = 0 ; // in case match() hits CCE, count and report these
111+ String matchCastErrorMessage = "" ; // report the error too (Java insists on initializing this ....)
110112
111113 // If pruning - Prepare the predicate and the columns before the FOR LOOP
112114 if ( doRuntimePruning ) {
@@ -182,22 +184,31 @@ protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRow
182184 Map <SchemaPath , ColumnStatistics > columnsStatistics = ParquetTableMetadataUtils .getRowGroupColumnStatistics (tableMetadataV4 , rowGroupMetadata );
183185
184186 //
185- // Perform the Run-Time Pruning - i.e. Skip this rowgroup if the match fails
187+ // Perform the Run-Time Pruning - i.e. Skip/prune this rowgroup if the match fails
186188 //
187- RowsMatch match = FilterEvaluatorUtils .matches (filterPredicate , columnsStatistics , footerRowCount );
188-
189- // collect logging info
190- long timeToRead = pruneTimer .elapsed (TimeUnit .MICROSECONDS );
189+ RowsMatch matchResult = RowsMatch .ALL ; // default (in case of exception) - do not prune this rowgroup
190+ try {
191+ matchResult = FilterEvaluatorUtils .matches (filterPredicate , columnsStatistics , footerRowCount );
192+
193+ // collect logging info
194+ long timeToRead = pruneTimer .elapsed (TimeUnit .MICROSECONDS );
195+ totalPruneTime += timeToRead ;
196+ logger .trace ("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec" , // trace each single rowgroup
197+ matchResult == RowsMatch .NONE ? "Excluded" : "Included" , rowGroup .getPath (), rowGroupIndex , footerRowCount , timeToRead );
198+ } catch (ClassCastException cce ) {
199+ countMatchClassCastExceptions ++; // one more CCE occured
200+ matchCastErrorMessage = cce .getMessage (); // report the (last) error message
201+ } catch (Exception e ) {
202+ // in case some unexpected exception is raised
203+ logger .warn ("Run-time pruning check failed - {}. Skip pruning rowgroup - {}" , e .getMessage (), rowGroup .getPath ());
204+ }
191205 pruneTimer .stop ();
192206 pruneTimer .reset ();
193- totalPruneTime += timeToRead ;
194- logger .trace ("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec" , // trace each single rowgroup
195- match == RowsMatch .NONE ? "Excluded" : "Included" , rowGroup .getPath (), rowGroupIndex , footerRowCount , timeToRead );
196207
197- // If this rowgroup failed the match - skip it
198- if (match == RowsMatch .NONE ) {
208+ // If this rowgroup failed the match - skip it (i.e., no reader for this rowgroup)
209+ if (matchResult == RowsMatch .NONE ) {
199210 rowgroupsPruned ++; // one more RG was pruned
200- if (firstRowGroup == null ) { // keep first RG, to be used in case all row groups are pruned
211+ if (firstRowGroup == null ) { // keep the first RG, to be used in case all row groups are pruned
201212 firstRowGroup = rowGroup ;
202213 firstFooter = footer ;
203214 }
@@ -214,10 +225,14 @@ protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRow
214225 mapWithMaxColumns = createReaderAndImplicitColumns (context , rowGroupScan , oContext , columnExplorer , readers , implicitColumns , mapWithMaxColumns , firstRowGroup , fs ,
215226 firstFooter , true );
216227 }
228+ // do some logging, if relevant
217229 if ( totalPruneTime > 0 ) {
218230 logger .info ("Finished parquet_runtime_pruning in {} usec. Out of given {} rowgroups, {} were pruned. {}" , totalPruneTime , totalRowgroups , rowgroupsPruned ,
219231 totalRowgroups == rowgroupsPruned ? "ALL_PRUNED !!" : "" );
220232 }
233+ if ( countMatchClassCastExceptions > 0 ) {
234+ logger .info ("Run-time pruning skipped for {} out of {} rowgroups due to: {}" ,countMatchClassCastExceptions , totalRowgroups , matchCastErrorMessage );
235+ }
221236
222237 // Update stats (same in every reader - the others would just overwrite the stats)
223238 for (CommonParquetRecordReader rr : readers ) {
0 commit comments