5555import org .apache .drill .metastore .ColumnStatistics ;
5656import org .apache .drill .metastore .ColumnStatisticsKind ;
5757import org .apache .drill .metastore .TableMetadata ;
58+ import org .slf4j .Logger ;
59+ import org .slf4j .LoggerFactory ;
5860
5961public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount {
62+ private static final Logger logger = LoggerFactory .getLogger (DrillRelMdDistinctRowCount .class );
63+
6064 private static final DrillRelMdDistinctRowCount INSTANCE =
6165 new DrillRelMdDistinctRowCount ();
6266
@@ -142,10 +146,7 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
142146 if (groupKey .length () == 0 ) {
143147 return selectivity * rowCount ;
144148 }
145- /* If predicate is present, determine its selectivity to estimate filtered rows. Thereafter,
146- * compute the number of distinct rows
147- */
148- selectivity = mq .getSelectivity (scan , predicate );
149+
149150 TableMetadata tableMetadata ;
150151 try {
151152 tableMetadata = table .getGroupScan ().getTableMetadata ();
@@ -154,38 +155,43 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
154155 return scan .estimateRowCount (mq ) * 0.1 ;
155156 }
156157
157- double s = 1.0 ;
158- boolean allCols = true ;
158+ double estRowCnt = 1.0 ;
159+ String colName = "" ;
160+ boolean allColsHaveNDV = true ;
159161 for (int i = 0 ; i < groupKey .length (); i ++) {
160- final String colName = type .getFieldNames ().get (i );
161- // Skip NDV, if not available
162+ colName = type .getFieldNames ().get (i );
162163 if (!groupKey .get (i )) {
163- allCols = false ;
164- break ;
164+ continue ;
165165 }
166166 ColumnStatistics columnStatistics = tableMetadata != null ?
167167 tableMetadata .getColumnStatistics (SchemaPath .getSimplePath (colName )) : null ;
168168 Double ndv = columnStatistics != null ? (Double ) columnStatistics .getStatistic (ColumnStatisticsKind .NDV ) : null ;
169+ // Skip NDV, if not available
169170 if (ndv == null ) {
170- continue ;
171+ allColsHaveNDV = false ;
172+ break ;
171173 }
172- s *= ndv ;
174+ estRowCnt *= ndv ;
173175 selectivity = getPredSelectivityContainingInputRef (predicate , i , mq , scan );
174176 /* If predicate is on group-by column, scale down the NDV by selectivity. Consider the query
175177 * select a, b from t where a = 10 group by a, b. Here, NDV(a) will be scaled down by SEL(a)
176178 * whereas NDV(b) will not.
177179 */
178180 if (selectivity > 0 ) {
179- s *= selectivity ;
181+ estRowCnt *= selectivity ;
180182 }
181183 }
182- s = Math .min (s , rowCount );
183- if (!allCols ) {
184+ estRowCnt = Math .min (estRowCnt , rowCount );
185+ if (!allColsHaveNDV ) {
186+ if (logger .isDebugEnabled ()) {
187+ logger .debug (String .format ("NDV not available for %s(%s). Using default rowcount for group-by %s" ,
188+ (tableMetadata != null ? tableMetadata .getTableName () : "" ), colName , groupKey .toString ()));
189+ }
184190 // Could not get any NDV estimate from stats - probably stats not present for GBY cols. So Guess!
185191 return scan .estimateRowCount (mq ) * 0.1 ;
186192 } else {
187193 /* rowCount maybe less than NDV(different source), sanity check OR NDV not used at all */
188- return s ;
194+ return estRowCnt ;
189195 }
190196 }
191197
@@ -239,18 +245,28 @@ private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadata
239245 if (groupKey .get (idx )) {
240246 // GBY key is present in some filter - now try options A) and B) as described above
241247 double ndvSGby = Double .MAX_VALUE ;
248+ Double ndv ;
242249 boolean presentInFilter = false ;
243250 ImmutableBitSet sGby = getSingleGbyKey (groupKey , idx );
244251 if (sGby != null ) {
252+ // If we see any NULL ndv i.e. cant process ..we bail out!
245253 for (ImmutableBitSet jFilter : joinFiltersSet ) {
246254 if (jFilter .contains (sGby )) {
247255 presentInFilter = true ;
248256 // Found join condition containing this GBY key. Pick min NDV across all columns in this join
249257 for (int fidx : jFilter ) {
250258 if (fidx < left .getRowType ().getFieldCount ()) {
251- ndvSGby = Math .min (ndvSGby , mq .getDistinctRowCount (left , ImmutableBitSet .of (fidx ), leftPred ));
259+ ndv = mq .getDistinctRowCount (left , ImmutableBitSet .of (fidx ), leftPred );
260+ if (ndv == null ) {
261+ return super .getDistinctRowCount (joinRel , mq , groupKey , predicate );
262+ }
263+ ndvSGby = Math .min (ndvSGby , ndv );
252264 } else {
253- ndvSGby = Math .min (ndvSGby , mq .getDistinctRowCount (right , ImmutableBitSet .of (fidx -left .getRowType ().getFieldCount ()), rightPred ));
265+ ndv = mq .getDistinctRowCount (right , ImmutableBitSet .of (fidx -left .getRowType ().getFieldCount ()), rightPred );
266+ if (ndv == null ) {
267+ return super .getDistinctRowCount (joinRel , mq , groupKey , predicate );
268+ }
269+ ndvSGby = Math .min (ndvSGby , ndv );
254270 }
255271 }
256272 break ;
@@ -260,9 +276,17 @@ private Double getDistinctRowCountInternal(DrillJoinRelBase joinRel, RelMetadata
260276 if (!presentInFilter ) {
261277 for (int sidx : sGby ) {
262278 if (sidx < left .getRowType ().getFieldCount ()) {
263- ndvSGby = mq .getDistinctRowCount (left , ImmutableBitSet .of (sidx ), leftPred );
279+ ndv = mq .getDistinctRowCount (left , ImmutableBitSet .of (sidx ), leftPred );
280+ if (ndv == null ) {
281+ return super .getDistinctRowCount (joinRel , mq , groupKey , predicate );
282+ }
283+ ndvSGby = ndv ;
264284 } else {
265- ndvSGby = mq .getDistinctRowCount (right , ImmutableBitSet .of (sidx -left .getRowType ().getFieldCount ()), rightPred );
285+ ndv = mq .getDistinctRowCount (right , ImmutableBitSet .of (sidx -left .getRowType ().getFieldCount ()), rightPred );
286+ if (ndv == null ) {
287+ return super .getDistinctRowCount (joinRel , mq , groupKey , predicate );
288+ }
289+ ndvSGby = ndv ;
266290 }
267291 }
268292 }
0 commit comments