Skip to content

Commit b774eec

Browse files
Gautam Paraigparai
authored andcommitted
DRILL-7227: Fix predicate check in DrillRelOptUtil.analyzeSimpleEquiJoin
closes #1775
1 parent e5e9b35 commit b774eec

File tree

3 files changed

+53
-23
lines changed

3 files changed

+53
-23
lines changed

exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -678,10 +678,16 @@ public Void visitCall(RexCall call) {
678678
super.visitCall(call);
679679
} else {
680680
if (call.getKind() == SqlKind.EQUALS) {
681-
int leftFieldCount = join.getLeft().getRowType().getFieldCount();
682-
int rightFieldCount = join.getRight().getRowType().getFieldCount();
683681
RexNode leftComparand = call.operands.get(0);
684682
RexNode rightComparand = call.operands.get(1);
683+
// If a join condition predicate has something more complicated than a RexInputRef
684+
// we bail out!
685+
if (!(leftComparand instanceof RexInputRef && rightComparand instanceof RexInputRef)) {
686+
joinConditions.clear();
687+
throw new Util.FoundOne(call);
688+
}
689+
int leftFieldCount = join.getLeft().getRowType().getFieldCount();
690+
int rightFieldCount = join.getRight().getRowType().getFieldCount();
685691
RexInputRef leftFieldAccess = (RexInputRef) leftComparand;
686692
RexInputRef rightFieldAccess = (RexInputRef) rightComparand;
687693
if (leftFieldAccess.getIndex() >= leftFieldCount + rightFieldCount ||

exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,12 @@
5555
import org.apache.drill.metastore.ColumnStatistics;
5656
import org.apache.drill.metastore.ColumnStatisticsKind;
5757
import org.apache.drill.metastore.TableMetadata;
58+
import org.slf4j.Logger;
59+
import org.slf4j.LoggerFactory;
5860

5961
public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
62+
private static final Logger logger = LoggerFactory.getLogger(DrillRelMdDistinctRowCount.class);
63+
6064
private static final DrillRelMdDistinctRowCount INSTANCE =
6165
new DrillRelMdDistinctRowCount();
6266

@@ -142,10 +146,7 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
142146
if (groupKey.length() == 0) {
143147
return selectivity * rowCount;
144148
}
145-
/* If predicate is present, determine its selectivity to estimate filtered rows. Thereafter,
146-
* compute the number of distinct rows
147-
*/
148-
selectivity = mq.getSelectivity(scan, predicate);
149+
149150
TableMetadata tableMetadata;
150151
try {
151152
tableMetadata = table.getGroupScan().getTableMetadata();
@@ -154,38 +155,43 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
154155
return scan.estimateRowCount(mq) * 0.1;
155156
}
156157

157-
double s = 1.0;
158-
boolean allCols = true;
158+
double estRowCnt = 1.0;
159+
String colName = "";
160+
boolean allColsHaveNDV = true;
159161
for (int i = 0; i < groupKey.length(); i++) {
160-
final String colName = type.getFieldNames().get(i);
161-
// Skip NDV, if not available
162+
colName = type.getFieldNames().get(i);
162163
if (!groupKey.get(i)) {
163-
allCols = false;
164-
break;
164+
continue;
165165
}
166166
ColumnStatistics columnStatistics = tableMetadata != null ?
167167
tableMetadata.getColumnStatistics(SchemaPath.getSimplePath(colName)) : null;
168168
Double ndv = columnStatistics != null ? (Double) columnStatistics.getStatistic(ColumnStatisticsKind.NDV) : null;
169+
// Skip NDV, if not available
169170
if (ndv == null) {
170-
continue;
171+
allColsHaveNDV = false;
172+
break;
171173
}
172-
s *= ndv;
174+
estRowCnt *= ndv;
173175
selectivity = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
174176
/* If predicate is on group-by column, scale down the NDV by selectivity. Consider the query
175177
* select a, b from t where a = 10 group by a, b. Here, NDV(a) will be scaled down by SEL(a)
176178
* whereas NDV(b) will not.
177179
*/
178180
if (selectivity > 0) {
179-
s *= selectivity;
181+
estRowCnt *= selectivity;
180182
}
181183
}
182-
s = Math.min(s, rowCount);
183-
if (!allCols) {
184+
estRowCnt = Math.min(estRowCnt, rowCount);
185+
if (!allColsHaveNDV) {
186+
if (logger.isDebugEnabled()) {
187+
logger.debug(String.format("NDV not available for %s(%s). Using default rowcount for group-by %s",
188+
(tableMetadata != null ? tableMetadata.getTableName() : ""), colName, groupKey.toString()));
189+
}
184190
// Could not get any NDV estimate from stats - probably stats not present for GBY cols. So Guess!
185191
return scan.estimateRowCount(mq) * 0.1;
186192
} else {
187193
/* rowCount maybe less than NDV(different source), sanity check OR NDV not used at all */
188-
return s;
194+
return estRowCnt;
189195
}
190196
}
191197

@@ -239,18 +245,28 @@ private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadata
239245
if (groupKey.get(idx)) {
240246
// GBY key is present in some filter - now try options A) and B) as described above
241247
double ndvSGby = Double.MAX_VALUE;
248+
Double ndv;
242249
boolean presentInFilter = false;
243250
ImmutableBitSet sGby = getSingleGbyKey(groupKey, idx);
244251
if (sGby != null) {
252+
// If we see any NULL ndv i.e. cant process ..we bail out!
245253
for (ImmutableBitSet jFilter : joinFiltersSet) {
246254
if (jFilter.contains(sGby)) {
247255
presentInFilter = true;
248256
// Found join condition containing this GBY key. Pick min NDV across all columns in this join
249257
for (int fidx : jFilter) {
250258
if (fidx < left.getRowType().getFieldCount()) {
251-
ndvSGby = Math.min(ndvSGby, mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred));
259+
ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred);
260+
if (ndv == null) {
261+
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
262+
}
263+
ndvSGby = Math.min(ndvSGby, ndv);
252264
} else {
253-
ndvSGby = Math.min(ndvSGby, mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx-left.getRowType().getFieldCount()), rightPred));
265+
ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx-left.getRowType().getFieldCount()), rightPred);
266+
if (ndv == null) {
267+
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
268+
}
269+
ndvSGby = Math.min(ndvSGby, ndv);
254270
}
255271
}
256272
break;
@@ -260,9 +276,17 @@ private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadata
260276
if (!presentInFilter) {
261277
for (int sidx : sGby) {
262278
if (sidx < left.getRowType().getFieldCount()) {
263-
ndvSGby = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
279+
ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
280+
if (ndv == null) {
281+
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
282+
}
283+
ndvSGby = ndv;
264284
} else {
265-
ndvSGby = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx-left.getRowType().getFieldCount()), rightPred);
285+
ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx-left.getRowType().getFieldCount()), rightPred);
286+
if (ndv == null) {
287+
return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
288+
}
289+
ndvSGby = ndv;
266290
}
267291
}
268292
}

exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ public void testUseStatistics() throws Exception {
290290
query = " select emp.employee_id from dfs.tmp.employeeUseStat emp join dfs.tmp.departmentUseStat dept"
291291
+ " on emp.department_id = dept.department_id "
292292
+ " group by emp.employee_id";
293-
String[] expectedPlan8 = {"HashAgg\\(group=\\[\\{0\\}\\]\\).*rowcount = 115.49475630811243,.*",
293+
String[] expectedPlan8 = {"HashAgg\\(group=\\[\\{0\\}\\]\\).*rowcount = 730.0992454469841,.*",
294294
"HashJoin\\(condition.*\\).*rowcount = 1155.0,.*",
295295
"Scan.*columns=\\[`department_id`, `employee_id`\\].*rowcount = 1155.0.*",
296296
"Scan.*columns=\\[`department_id`\\].*rowcount = 12.0.*"};

0 commit comments

Comments
 (0)