Skip to content

Commit fbfcc16

Browse files
924060929kaka11chen
authored andcommitted
only support prune olapTable/iceberg/hive(parquet/orc)
1 parent 613ed21 commit fbfcc16

File tree

11 files changed

+185
-42
lines changed

11 files changed

+185
-42
lines changed

fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,15 @@
5858
import org.apache.doris.nereids.exceptions.NotSupportedException;
5959
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
6060
import org.apache.doris.qe.GlobalVariable;
61+
import org.apache.doris.qe.SessionVariable;
6162
import org.apache.doris.statistics.AnalysisInfo;
6263
import org.apache.doris.statistics.BaseAnalysisTask;
6364
import org.apache.doris.statistics.ColumnStatistic;
6465
import org.apache.doris.statistics.ColumnStatisticBuilder;
6566
import org.apache.doris.statistics.HMSAnalysisTask;
6667
import org.apache.doris.statistics.StatsType;
6768
import org.apache.doris.statistics.util.StatisticsUtil;
69+
import org.apache.doris.thrift.TFileFormatType;
6870
import org.apache.doris.thrift.THiveTable;
6971
import org.apache.doris.thrift.TTableDescriptor;
7072
import org.apache.doris.thrift.TTableType;
@@ -1188,6 +1190,44 @@ public List<SysTable> getSupportedSysTables() {
11881190
}
11891191
}
11901192

1193+
public TFileFormatType getFileFormatType(SessionVariable sessionVariable) throws UserException {
1194+
TFileFormatType type = null;
1195+
Table table = getRemoteTable();
1196+
String inputFormatName = table.getSd().getInputFormat();
1197+
String hiveFormat = HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName);
1198+
if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) {
1199+
type = TFileFormatType.FORMAT_PARQUET;
1200+
} else if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.ORC.getDesc())) {
1201+
type = TFileFormatType.FORMAT_ORC;
1202+
} else if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.TEXT_FILE.getDesc())) {
1203+
String serDeLib = table.getSd().getSerdeInfo().getSerializationLib();
1204+
if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_JSON_SERDE)
1205+
|| serDeLib.equals(HiveMetaStoreClientHelper.LEGACY_HIVE_JSON_SERDE)) {
1206+
type = TFileFormatType.FORMAT_JSON;
1207+
} else if (serDeLib.equals(HiveMetaStoreClientHelper.OPENX_JSON_SERDE)) {
1208+
if (!sessionVariable.isReadHiveJsonInOneColumn()) {
1209+
type = TFileFormatType.FORMAT_JSON;
1210+
} else if (sessionVariable.isReadHiveJsonInOneColumn() && firstColumnIsString()) {
1211+
type = TFileFormatType.FORMAT_CSV_PLAIN;
1212+
} else {
1213+
throw new UserException("You set read_hive_json_in_one_column = true, but the first column of "
1214+
+ "table " + getName()
1215+
+ " is not a string column.");
1216+
}
1217+
} else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_TEXT_SERDE)) {
1218+
type = TFileFormatType.FORMAT_TEXT;
1219+
} else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_OPEN_CSV_SERDE)) {
1220+
type = TFileFormatType.FORMAT_CSV_PLAIN;
1221+
} else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_MULTI_DELIMIT_SERDE)) {
1222+
type = TFileFormatType.FORMAT_TEXT;
1223+
} else {
1224+
throw new UserException("Unsupported hive table serde: " + serDeLib);
1225+
}
1226+
}
1227+
return type;
1228+
}
1229+
1230+
11911231
private Table loadHiveTable() {
11921232
HMSCachedClient client = ((HMSExternalCatalog) catalog).getClient();
11931233
return client.getTable(getRemoteDbName(), remoteName);

fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -409,44 +409,9 @@ public TableIf getTargetTable() {
409409

410410
@Override
411411
public TFileFormatType getFileFormatType() throws UserException {
412-
TFileFormatType type = null;
413-
Table table = hmsTable.getRemoteTable();
414-
String inputFormatName = table.getSd().getInputFormat();
415-
String hiveFormat = HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName);
416-
if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) {
417-
type = TFileFormatType.FORMAT_PARQUET;
418-
} else if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.ORC.getDesc())) {
419-
type = TFileFormatType.FORMAT_ORC;
420-
} else if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.TEXT_FILE.getDesc())) {
421-
String serDeLib = table.getSd().getSerdeInfo().getSerializationLib();
422-
if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_JSON_SERDE)
423-
|| serDeLib.equals(HiveMetaStoreClientHelper.LEGACY_HIVE_JSON_SERDE)) {
424-
type = TFileFormatType.FORMAT_JSON;
425-
} else if (serDeLib.equals(HiveMetaStoreClientHelper.OPENX_JSON_SERDE)) {
426-
if (!sessionVariable.isReadHiveJsonInOneColumn()) {
427-
type = TFileFormatType.FORMAT_JSON;
428-
} else if (sessionVariable.isReadHiveJsonInOneColumn()
429-
&& hmsTable.firstColumnIsString()) {
430-
type = TFileFormatType.FORMAT_CSV_PLAIN;
431-
} else {
432-
throw new UserException("You set read_hive_json_in_one_column = true, but the first column of "
433-
+ "table " + hmsTable.getName()
434-
+ " is not a string column.");
435-
}
436-
} else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_TEXT_SERDE)) {
437-
type = TFileFormatType.FORMAT_TEXT;
438-
} else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_OPEN_CSV_SERDE)) {
439-
type = TFileFormatType.FORMAT_CSV_PLAIN;
440-
} else if (serDeLib.equals(HiveMetaStoreClientHelper.HIVE_MULTI_DELIMIT_SERDE)) {
441-
type = TFileFormatType.FORMAT_TEXT;
442-
} else {
443-
throw new UserException("Unsupported hive table serde: " + serDeLib);
444-
}
445-
}
446-
return type;
412+
return hmsTable.getFileFormatType(sessionVariable);
447413
}
448414

449-
450415
@Override
451416
protected void setScanParams(TFileRangeDesc rangeDesc, Split split) {
452417
if (split instanceof HiveSplit) {

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.apache.doris.nereids.trees.expressions.SlotReference;
3333
import org.apache.doris.nereids.trees.expressions.functions.Function;
3434
import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda;
35+
import org.apache.doris.nereids.trees.expressions.functions.table.TableValuedFunction;
3536
import org.apache.doris.nereids.trees.plans.Plan;
3637
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
3738
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
@@ -56,6 +57,7 @@
5657
import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
5758
import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
5859
import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
60+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumn;
5961
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
6062
import org.apache.doris.nereids.types.ArrayType;
6163
import org.apache.doris.nereids.types.DataType;
@@ -374,6 +376,10 @@ public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, Void contex
374376

375377
@Override
376378
public Plan visitLogicalFileScan(LogicalFileScan fileScan, Void context) {
379+
if (!fileScan.supportPruneNestedColumn()) {
380+
return fileScan;
381+
}
382+
377383
Pair<Boolean, List<Slot>> replaced = replaceExpressions(fileScan.getOutput(), false, true);
378384
if (replaced.first) {
379385
List<Slot> replaceSlots = new ArrayList<>(replaced.second);
@@ -407,6 +413,11 @@ public Plan visitLogicalFileScan(LogicalFileScan fileScan, Void context) {
407413

408414
@Override
409415
public Plan visitLogicalTVFRelation(LogicalTVFRelation tvfRelation, Void context) {
416+
TableValuedFunction function = tvfRelation.getFunction();
417+
if (!(function instanceof SupportPruneNestedColumn)
418+
|| !((SupportPruneNestedColumn) function).supportPruneNestedColumn()) {
419+
return tvfRelation;
420+
}
410421
Pair<Boolean, List<Slot>> replaced
411422
= replaceExpressions(tvfRelation.getOutput(), false, true);
412423
if (replaced.first) {

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/File.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.doris.nereids.exceptions.AnalysisException;
2222
import org.apache.doris.nereids.trees.expressions.Properties;
2323
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
24+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumn;
25+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumnFormats;
2426
import org.apache.doris.nereids.types.coercion.AnyDataType;
2527
import org.apache.doris.tablefunction.FileTableValuedFunction;
2628
import org.apache.doris.tablefunction.TableValuedFunctionIf;
@@ -30,7 +32,7 @@
3032
/**
3133
* File Tvf
3234
*/
33-
public class File extends TableValuedFunction {
35+
public class File extends TableValuedFunction implements SupportPruneNestedColumn {
3436
public File(Properties properties) {
3537
super("file", properties);
3638
}
@@ -55,4 +57,8 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
5557
return visitor.visitFile(this, context);
5658
}
5759

60+
@Override
61+
public boolean supportPruneNestedColumn() {
62+
return SupportPruneNestedColumnFormats.supportFormat(getTVFProperties());
63+
}
5864
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Hdfs.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.doris.nereids.exceptions.AnalysisException;
2222
import org.apache.doris.nereids.trees.expressions.Properties;
2323
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
24+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumn;
25+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumnFormats;
2426
import org.apache.doris.nereids.types.coercion.AnyDataType;
2527
import org.apache.doris.tablefunction.HdfsTableValuedFunction;
2628
import org.apache.doris.tablefunction.TableValuedFunctionIf;
@@ -29,7 +31,7 @@
2931
import java.util.Map;
3032

3133
/** hdfs */
32-
public class Hdfs extends TableValuedFunction {
34+
public class Hdfs extends TableValuedFunction implements SupportPruneNestedColumn {
3335
public Hdfs(Properties properties) {
3436
super("hdfs", properties);
3537
}
@@ -53,4 +55,9 @@ protected TableValuedFunctionIf toCatalogFunction() {
5355
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
5456
return visitor.visitHdfs(this, context);
5557
}
58+
59+
@Override
60+
public boolean supportPruneNestedColumn() {
61+
return SupportPruneNestedColumnFormats.supportFormat(getTVFProperties());
62+
}
5663
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Local.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.doris.nereids.exceptions.AnalysisException;
2222
import org.apache.doris.nereids.trees.expressions.Properties;
2323
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
24+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumn;
25+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumnFormats;
2426
import org.apache.doris.nereids.types.coercion.AnyDataType;
2527
import org.apache.doris.tablefunction.LocalTableValuedFunction;
2628
import org.apache.doris.tablefunction.TableValuedFunctionIf;
@@ -30,7 +32,7 @@
3032
/**
3133
* local
3234
*/
33-
public class Local extends TableValuedFunction {
35+
public class Local extends TableValuedFunction implements SupportPruneNestedColumn {
3436
public Local(Properties properties) {
3537
super("local", properties);
3638
}
@@ -54,4 +56,9 @@ protected TableValuedFunctionIf toCatalogFunction() {
5456
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
5557
return visitor.visitLocal(this, context);
5658
}
59+
60+
@Override
61+
public boolean supportPruneNestedColumn() {
62+
return SupportPruneNestedColumnFormats.supportFormat(getTVFProperties());
63+
}
5764
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/S3.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@
2121
import org.apache.doris.nereids.exceptions.AnalysisException;
2222
import org.apache.doris.nereids.trees.expressions.Properties;
2323
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
24+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumn;
25+
import org.apache.doris.nereids.trees.plans.logical.SupportPruneNestedColumnFormats;
2426
import org.apache.doris.nereids.types.coercion.AnyDataType;
2527
import org.apache.doris.tablefunction.S3TableValuedFunction;
2628
import org.apache.doris.tablefunction.TableValuedFunctionIf;
2729

2830
import java.util.Map;
2931

3032
/** s3 */
31-
public class S3 extends TableValuedFunction {
33+
public class S3 extends TableValuedFunction implements SupportPruneNestedColumn {
3234
public S3(Properties properties) {
3335
super("s3", properties);
3436
}
@@ -52,4 +54,9 @@ protected TableValuedFunctionIf toCatalogFunction() {
5254
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
5355
return visitor.visitS3(this, context);
5456
}
57+
58+
@Override
59+
public boolean supportPruneNestedColumn() {
60+
return SupportPruneNestedColumnFormats.supportFormat(getTVFProperties());
61+
}
5562
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.doris.analysis.TableSnapshot;
2222
import org.apache.doris.catalog.PartitionItem;
2323
import org.apache.doris.datasource.ExternalTable;
24+
import org.apache.doris.datasource.hive.HMSExternalTable;
25+
import org.apache.doris.datasource.iceberg.IcebergExternalTable;
2426
import org.apache.doris.datasource.mvcc.MvccUtil;
2527
import org.apache.doris.nereids.memo.GroupExpression;
2628
import org.apache.doris.nereids.properties.LogicalProperties;
@@ -32,6 +34,9 @@
3234
import org.apache.doris.nereids.trees.plans.RelationId;
3335
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
3436
import org.apache.doris.nereids.util.Utils;
37+
import org.apache.doris.qe.ConnectContext;
38+
import org.apache.doris.qe.SessionVariable;
39+
import org.apache.doris.thrift.TFileFormatType;
3540

3641
import com.google.common.base.Preconditions;
3742
import com.google.common.collect.ImmutableList;
@@ -46,7 +51,7 @@
4651
/**
4752
* Logical file scan for external catalog.
4853
*/
49-
public class LogicalFileScan extends LogicalCatalogRelation {
54+
public class LogicalFileScan extends LogicalCatalogRelation implements SupportPruneNestedColumn {
5055
protected final SelectedPartitions selectedPartitions;
5156
protected final Optional<TableSample> tableSample;
5257
protected final Optional<TableSnapshot> tableSnapshot;
@@ -189,6 +194,30 @@ public List<Slot> computeAsteriskOutput() {
189194
return super.computeAsteriskOutput();
190195
}
191196

197+
@Override
198+
public boolean supportPruneNestedColumn() {
199+
ExternalTable table = getTable();
200+
if (table instanceof IcebergExternalTable) {
201+
return true;
202+
} else if (table instanceof HMSExternalTable) {
203+
try {
204+
ConnectContext connectContext = ConnectContext.get();
205+
SessionVariable sessionVariable = connectContext.getSessionVariable();
206+
TFileFormatType fileFormatType = ((HMSExternalTable) table).getFileFormatType(sessionVariable);
207+
switch (fileFormatType) {
208+
case FORMAT_PARQUET:
209+
case FORMAT_ORC:
210+
return true;
211+
default:
212+
return false;
213+
}
214+
} catch (Throwable t) {
215+
// ignore and not prune
216+
}
217+
}
218+
return false;
219+
}
220+
192221
/**
193222
* SelectedPartitions contains the selected partitions and the total partition number.
194223
* Mainly for hive table partition pruning.

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
/**
7373
* Logical OlapScan.
7474
*/
75-
public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan {
75+
public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan, SupportPruneNestedColumn {
7676

7777
private static final Logger LOG = LogManager.getLogger(LogicalOlapScan.class);
7878

@@ -868,4 +868,9 @@ public LogicalOlapScan withPrunedTypeSlots(List<Slot> outputSlots) {
868868
manuallySpecifiedTabletIds, operativeSlots, virtualColumns, scoreOrderKeys, scoreLimit,
869869
annOrderKeys, annLimit, tableAlias);
870870
}
871+
872+
@Override
873+
public boolean supportPruneNestedColumn() {
874+
return true;
875+
}
871876
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.plans.logical;
19+
20+
/** SupportPruneNestedColumn */
21+
public interface SupportPruneNestedColumn {
22+
// return false will not prune the nested column
23+
boolean supportPruneNestedColumn();
24+
}

0 commit comments

Comments
 (0)