Skip to content

Commit e5e9b35

Browse files
committed
DRILL-7240: Catch runtime pruning filter-match exceptions and do not prune these rowgroups
closes #1783
1 parent 3603f24 commit e5e9b35

File tree

1 file changed

+26
-11
lines changed

1 file changed

+26
-11
lines changed

exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetScanBatchCreator.java

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRow
107107
long totalPruneTime = 0;
108108
long totalRowgroups = rowGroupScan.getRowGroupReadEntries().size();
109109
Stopwatch pruneTimer = Stopwatch.createUnstarted();
110+
int countMatchClassCastExceptions = 0; // in case match() hits CCE, count and report these
111+
String matchCastErrorMessage = ""; // report the error too (Java insists on initializing this ....)
110112

111113
// If pruning - Prepare the predicate and the columns before the FOR LOOP
112114
if ( doRuntimePruning ) {
@@ -182,22 +184,31 @@ protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRow
182184
Map<SchemaPath, ColumnStatistics> columnsStatistics = ParquetTableMetadataUtils.getRowGroupColumnStatistics(tableMetadataV4, rowGroupMetadata);
183185

184186
//
185-
// Perform the Run-Time Pruning - i.e. Skip this rowgroup if the match fails
187+
// Perform the Run-Time Pruning - i.e. Skip/prune this rowgroup if the match fails
186188
//
187-
RowsMatch match = FilterEvaluatorUtils.matches(filterPredicate, columnsStatistics, footerRowCount);
188-
189-
// collect logging info
190-
long timeToRead = pruneTimer.elapsed(TimeUnit.MICROSECONDS);
189+
RowsMatch matchResult = RowsMatch.ALL; // default (in case of exception) - do not prune this rowgroup
190+
try {
191+
matchResult = FilterEvaluatorUtils.matches(filterPredicate, columnsStatistics, footerRowCount);
192+
193+
// collect logging info
194+
long timeToRead = pruneTimer.elapsed(TimeUnit.MICROSECONDS);
195+
totalPruneTime += timeToRead;
196+
logger.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", // trace each single rowgroup
197+
matchResult == RowsMatch.NONE ? "Excluded" : "Included", rowGroup.getPath(), rowGroupIndex, footerRowCount, timeToRead);
198+
} catch (ClassCastException cce) {
199+
countMatchClassCastExceptions++; // one more CCE occured
200+
matchCastErrorMessage = cce.getMessage(); // report the (last) error message
201+
} catch (Exception e) {
202+
// in case some unexpected exception is raised
203+
logger.warn("Run-time pruning check failed - {}. Skip pruning rowgroup - {}", e.getMessage(), rowGroup.getPath());
204+
}
191205
pruneTimer.stop();
192206
pruneTimer.reset();
193-
totalPruneTime += timeToRead;
194-
logger.trace("Run-time pruning: {} row-group {} (RG index: {} row count: {}), took {} usec", // trace each single rowgroup
195-
match == RowsMatch.NONE ? "Excluded" : "Included", rowGroup.getPath(), rowGroupIndex, footerRowCount, timeToRead);
196207

197-
// If this rowgroup failed the match - skip it
198-
if (match == RowsMatch.NONE) {
208+
// If this rowgroup failed the match - skip it (i.e., no reader for this rowgroup)
209+
if (matchResult == RowsMatch.NONE) {
199210
rowgroupsPruned++; // one more RG was pruned
200-
if (firstRowGroup == null) { // keep first RG, to be used in case all row groups are pruned
211+
if (firstRowGroup == null) { // keep the first RG, to be used in case all row groups are pruned
201212
firstRowGroup = rowGroup;
202213
firstFooter = footer;
203214
}
@@ -214,10 +225,14 @@ protected ScanBatch getBatch(ExecutorFragmentContext context, AbstractParquetRow
214225
mapWithMaxColumns = createReaderAndImplicitColumns(context, rowGroupScan, oContext, columnExplorer, readers, implicitColumns, mapWithMaxColumns, firstRowGroup, fs,
215226
firstFooter, true);
216227
}
228+
// do some logging, if relevant
217229
if ( totalPruneTime > 0 ) {
218230
logger.info("Finished parquet_runtime_pruning in {} usec. Out of given {} rowgroups, {} were pruned. {}", totalPruneTime, totalRowgroups, rowgroupsPruned,
219231
totalRowgroups == rowgroupsPruned ? "ALL_PRUNED !!" : "");
220232
}
233+
if ( countMatchClassCastExceptions > 0 ) {
234+
logger.info("Run-time pruning skipped for {} out of {} rowgroups due to: {}",countMatchClassCastExceptions, totalRowgroups, matchCastErrorMessage);
235+
}
221236

222237
// Update stats (same in every reader - the others would just overwrite the stats)
223238
for (CommonParquetRecordReader rr : readers ) {

0 commit comments

Comments
 (0)