Skip to content

Commit c28afa3

Browse files
authored
[opt](complex type) support prune nested column through lateral view (#58776)
support prune nested column through lateral view with the functions: explode, explode_outer, explode_map, explode_map_outer, posexplode, posexplode_outer, #57204 related
1 parent 680bbea commit c28afa3

24 files changed

+378
-67
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,14 @@ public TAccessPathType getType() {
449449
public void setType(TAccessPathType type) {
450450
this.type = type;
451451
}
452+
453+
public AccessPathBuilder getAccessPathBuilder() {
454+
return accessPathBuilder;
455+
}
452456
}
453457

454-
private static class AccessPathBuilder {
458+
/** AccessPathBuilder */
459+
public static class AccessPathBuilder {
455460
private LinkedList<String> accessPath;
456461

457462
public AccessPathBuilder() {
@@ -463,6 +468,16 @@ public AccessPathBuilder addPrefix(String prefix) {
463468
return this;
464469
}
465470

471+
public AccessPathBuilder addSuffix(String suffix) {
472+
accessPath.addLast(suffix);
473+
return this;
474+
}
475+
476+
public AccessPathBuilder addSuffix(List<String> suffix) {
477+
accessPath.addAll(suffix);
478+
return this;
479+
}
480+
466481
public AccessPathBuilder removePrefix() {
467482
accessPath.removeFirst();
468483
return this;

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,29 @@
1717

1818
package org.apache.doris.nereids.rules.rewrite;
1919

20+
import org.apache.doris.analysis.AccessPathInfo;
2021
import org.apache.doris.nereids.StatementContext;
2122
import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectAccessPathResult;
23+
import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectorContext;
2224
import org.apache.doris.nereids.trees.expressions.Alias;
2325
import org.apache.doris.nereids.trees.expressions.Expression;
2426
import org.apache.doris.nereids.trees.expressions.NamedExpression;
2527
import org.apache.doris.nereids.trees.expressions.Slot;
28+
import org.apache.doris.nereids.trees.expressions.functions.Function;
29+
import org.apache.doris.nereids.trees.expressions.functions.generator.Explode;
30+
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMap;
31+
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMapOuter;
32+
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeOuter;
33+
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode;
34+
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter;
35+
import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
2636
import org.apache.doris.nereids.trees.plans.Plan;
2737
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor;
2838
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
2939
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer;
3040
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan;
3141
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
42+
import org.apache.doris.nereids.trees.plans.logical.LogicalGenerate;
3243
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
3344
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
3445
import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation;
@@ -45,6 +56,8 @@
4556
import java.util.List;
4657
import java.util.Map;
4758
import java.util.Map.Entry;
59+
import java.util.Set;
60+
import java.util.TreeSet;
4861

4962
/** AccessPathPlanCollector */
5063
public class AccessPathPlanCollector extends DefaultPlanVisitor<Void, StatementContext> {
@@ -56,6 +69,145 @@ public Map<Slot, List<CollectAccessPathResult>> collect(Plan root, StatementCont
5669
return scanSlotToAccessPaths;
5770
}
5871

72+
@Override
73+
public Void visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, StatementContext context) {
74+
List<Function> generators = generate.getGenerators();
75+
List<Slot> output = generate.getGeneratorOutput();
76+
77+
AccessPathExpressionCollector exprCollector
78+
= new AccessPathExpressionCollector(context, allSlotToAccessPaths, false);
79+
for (int i = 0; i < output.size(); i++) {
80+
Slot generatorOutput = output.get(i);
81+
Function function = generators.get(i);
82+
Collection<CollectAccessPathResult> accessPaths = allSlotToAccessPaths.get(
83+
generatorOutput.getExprId().asInt());
84+
if (function instanceof Explode || function instanceof ExplodeOuter) {
85+
if (accessPaths.isEmpty()) {
86+
// use the whole column
87+
for (Expression child : function.children()) {
88+
exprCollector.collect(child);
89+
}
90+
} else {
91+
for (CollectAccessPathResult accessPath : accessPaths) {
92+
List<String> path = accessPath.getPath();
93+
if (function.arity() == 1) {
94+
// $c$1.VALUES.b
95+
CollectorContext argumentContext = new CollectorContext(context, false);
96+
argumentContext.setType(accessPath.getType());
97+
argumentContext.getAccessPathBuilder()
98+
.addSuffix(AccessPathInfo.ACCESS_ALL)
99+
.addSuffix(path.subList(1, path.size()));
100+
function.child(0).accept(exprCollector, argumentContext);
101+
continue;
102+
} else if (path.size() >= 2) {
103+
// $c$1.col1.VALUES.b will be extract 'col1'
104+
String colName = path.get(1);
105+
// extract '1' in 'col1'
106+
int colIndex = Integer.parseInt(colName.substring(StructLiteral.COL_PREFIX.length())) - 1;
107+
CollectorContext argumentContext = new CollectorContext(context, false);
108+
argumentContext.setType(accessPath.getType());
109+
argumentContext.getAccessPathBuilder()
110+
.addSuffix(AccessPathInfo.ACCESS_ALL)
111+
.addSuffix(path.subList(2, path.size()));
112+
function.child(colIndex).accept(exprCollector, argumentContext);
113+
continue;
114+
}
115+
// use the whole column
116+
for (Expression child : function.children()) {
117+
exprCollector.collect(child);
118+
}
119+
}
120+
}
121+
} else if (function instanceof ExplodeMap || function instanceof ExplodeMapOuter) {
122+
if (accessPaths.isEmpty()) {
123+
// use the whole column
124+
for (Expression child : function.children()) {
125+
exprCollector.collect(child);
126+
}
127+
} else {
128+
for (CollectAccessPathResult accessPath : accessPaths) {
129+
List<String> path = accessPath.getPath();
130+
if (path.size() >= 2) {
131+
if (path.get(1).equalsIgnoreCase(StructLiteral.COL_PREFIX + "1")) {
132+
// key
133+
for (Expression child : function.children()) {
134+
CollectorContext argumentContext = new CollectorContext(context, false);
135+
argumentContext.setType(accessPath.getType());
136+
argumentContext.getAccessPathBuilder()
137+
.addSuffix(AccessPathInfo.ACCESS_MAP_KEYS)
138+
.addSuffix(path.subList(2, path.size()));
139+
child.accept(exprCollector, argumentContext);
140+
}
141+
continue;
142+
} else if (path.get(1).equalsIgnoreCase(StructLiteral.COL_PREFIX + "2")) {
143+
// value
144+
for (Expression child : function.children()) {
145+
CollectorContext argumentContext = new CollectorContext(context, false);
146+
argumentContext.setType(accessPath.getType());
147+
argumentContext.getAccessPathBuilder()
148+
.addSuffix(AccessPathInfo.ACCESS_MAP_VALUES)
149+
.addSuffix(path.subList(2, path.size()));
150+
child.accept(exprCollector, argumentContext);
151+
}
152+
continue;
153+
}
154+
}
155+
// use the whole column
156+
exprCollector.collect(function.child(0));
157+
}
158+
}
159+
} else if (function instanceof PosExplode || function instanceof PosExplodeOuter) {
160+
if (accessPaths.isEmpty()) {
161+
// use the whole column
162+
for (Expression child : function.children()) {
163+
exprCollector.collect(child);
164+
}
165+
} else {
166+
boolean useWholeItem = false;
167+
Set<Integer> prunedChildIndex = new TreeSet<>();
168+
for (CollectAccessPathResult accessPath : accessPaths) {
169+
List<String> path = accessPath.getPath();
170+
if (path.size() >= 2) {
171+
// $c$1.col1.VALUES.b will be extract 'col1'
172+
String colName = path.get(1);
173+
if (colName.startsWith(StructLiteral.COL_PREFIX)) {
174+
// $c$1.col1.VALUES.b will be extract 'col1'
175+
// extract '1' in 'col1'
176+
int colIndex
177+
= Integer.parseInt(colName.substring(StructLiteral.COL_PREFIX.length())) - 1;
178+
CollectorContext argumentContext = new CollectorContext(context, false);
179+
argumentContext.setType(accessPath.getType());
180+
argumentContext.getAccessPathBuilder()
181+
.addSuffix(AccessPathInfo.ACCESS_ALL)
182+
.addSuffix(path.subList(2, path.size()));
183+
function.child(colIndex).accept(exprCollector, argumentContext);
184+
prunedChildIndex.add(colIndex);
185+
}
186+
} else {
187+
useWholeItem = true;
188+
break;
189+
}
190+
}
191+
if (useWholeItem) {
192+
// use the whole column
193+
for (Expression child : function.children()) {
194+
exprCollector.collect(child);
195+
}
196+
} else {
197+
for (int j = 0; j < function.arity(); j++) {
198+
if (!prunedChildIndex.contains(j)) {
199+
exprCollector.collect(function.child(j));
200+
}
201+
}
202+
}
203+
}
204+
} else {
205+
exprCollector.collect(function);
206+
}
207+
}
208+
return generate.child().accept(this, context);
209+
}
210+
59211
@Override
60212
public Void visitLogicalProject(LogicalProject<? extends Plan> project, StatementContext context) {
61213
AccessPathExpressionCollector exprCollector

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,12 @@ public Plan visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, Void
325325

326326
Pair<Boolean, List<Function>> replacedGenerators
327327
= replaceExpressions(generate.getGenerators(), false, false);
328+
for (int i = 0; i < replacedGenerators.second.size(); i++) {
329+
DataType dataType = replacedGenerators.second.get(i).getDataType();
330+
replacedDataTypes.put(generate.getGeneratorOutput().get(i).getExprId().asInt(),
331+
new AccessPathInfo(dataType, null, null)
332+
);
333+
}
328334
Pair<Boolean, List<Slot>> replacedGeneratorOutput
329335
= replaceExpressions(generate.getGeneratorOutput(), false, false);
330336
if (replacedGenerators.first || replacedGeneratorOutput.first) {

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@
2323
import org.apache.doris.nereids.trees.expressions.functions.ComputePrecision;
2424
import org.apache.doris.nereids.trees.expressions.functions.CustomSignature;
2525
import org.apache.doris.nereids.trees.expressions.functions.SearchSignature;
26+
import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
2627
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
2728
import org.apache.doris.nereids.types.ArrayType;
2829
import org.apache.doris.nereids.types.DataType;
2930
import org.apache.doris.nereids.types.NullType;
3031
import org.apache.doris.nereids.types.StructField;
3132
import org.apache.doris.nereids.types.StructType;
33+
import org.apache.doris.nereids.util.ExpressionUtils;
3234

3335
import com.google.common.base.Preconditions;
3436
import com.google.common.collect.ImmutableList;
@@ -42,12 +44,11 @@
4244
* where the first column contains 1, 2, 3, and the second column contains 4, 5, 6.
4345
*/
4446
public class Explode extends TableGeneratingFunction implements CustomSignature, ComputePrecision, AlwaysNullable {
45-
4647
/**
4748
* constructor with one or more argument.
4849
*/
49-
public Explode(Expression[] args) {
50-
super("explode", args);
50+
public Explode(Expression arg, Expression... others) {
51+
super("explode", ExpressionUtils.mergeArguments(arg, others));
5152
}
5253

5354
/** constructor for withChildren and reuse signature */
@@ -77,10 +78,10 @@ public FunctionSignature customSignature() {
7778
if (children.get(i).getDataType().isNullType()) {
7879
arguments.add(ArrayType.of(NullType.INSTANCE));
7980
structFields.add(
80-
new StructField("col" + (i + 1), NullType.INSTANCE, true, ""));
81+
new StructField(StructLiteral.COL_PREFIX + (i + 1), NullType.INSTANCE, true, ""));
8182
} else if (children.get(i).getDataType().isArrayType()) {
8283
structFields.add(
83-
new StructField("col" + (i + 1),
84+
new StructField(StructLiteral.COL_PREFIX + (i + 1),
8485
((ArrayType) (children.get(i)).getDataType()).getItemType(), true, ""));
8586
arguments.add(children.get(i).getDataType());
8687
} else {

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
7676

7777
@Override
7878
public Expression rewriteWhenAnalyze() {
79-
Expression[] args = {new Cast(children.get(0), ArrayType.of(DoubleType.INSTANCE))};
80-
return new Explode(args);
79+
return new Explode(new Cast(children.get(0), ArrayType.of(DoubleType.INSTANCE)));
8180
}
8281
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
7676

7777
@Override
7878
public Expression rewriteWhenAnalyze() {
79-
Expression[] args = {new Cast(children.get(0), ArrayType.of(DoubleType.INSTANCE))};
80-
return new ExplodeOuter(args);
79+
return new ExplodeOuter(new Cast(children.get(0), ArrayType.of(DoubleType.INSTANCE)));
8180
}
8281
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
7676

7777
@Override
7878
public Expression rewriteWhenAnalyze() {
79-
Expression[] args = {new Cast(children.get(0), ArrayType.of(BigIntType.INSTANCE))};
80-
return new Explode(args);
79+
return new Explode(new Cast(children.get(0), ArrayType.of(BigIntType.INSTANCE)));
8180
}
8281
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
7676

7777
@Override
7878
public Expression rewriteWhenAnalyze() {
79-
Expression[] args = {new Cast(children.get(0), ArrayType.of(BigIntType.INSTANCE))};
80-
return new ExplodeOuter(args);
79+
return new ExplodeOuter(new Cast(children.get(0), ArrayType.of(BigIntType.INSTANCE)));
8180
}
8281
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
7575

7676
@Override
7777
public Expression rewriteWhenAnalyze() {
78-
Expression[] args = {new Cast(children.get(0), ArrayType.of(JsonType.INSTANCE))};
79-
return new Explode(args);
78+
return new Explode(new Cast(children.get(0), ArrayType.of(JsonType.INSTANCE)));
8079
}
8180
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
7575

7676
@Override
7777
public Expression rewriteWhenAnalyze() {
78-
Expression[] args = {new Cast(children.get(0), ArrayType.of(JsonType.INSTANCE))};
79-
return new ExplodeOuter(args);
78+
return new ExplodeOuter(new Cast(children.get(0), ArrayType.of(JsonType.INSTANCE)));
8079
}
8180
}

0 commit comments

Comments
 (0)