Skip to content

Commit 69763f6

Browse files
LantaoJinxinyual
authored andcommitted
Fix field not found issue in join output when column names are ambiguous (#3760)
* Fix field not found issue in join output when column names are ambiguous Signed-off-by: Lantao Jin <ltjin@amazon.com> * update join doc Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com> Signed-off-by: xinyual <xinyual@amazon.com>
1 parent 253a81d commit 69763f6

File tree

12 files changed

+548
-370
lines changed

12 files changed

+548
-370
lines changed

core/src/main/java/org/opensearch/sql/ast/tree/Join.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,49 @@
1919

2020
@ToString
2121
@Getter
22-
@RequiredArgsConstructor
2322
@EqualsAndHashCode(callSuper = false)
2423
public class Join extends UnresolvedPlan {
2524
private UnresolvedPlan left;
2625
private final UnresolvedPlan right;
27-
private final Optional<String> leftAlias;
28-
private final Optional<String> rightAlias;
26+
private Optional<String> leftAlias;
27+
private Optional<String> rightAlias;
2928
private final JoinType joinType;
3029
private final Optional<UnresolvedExpression> joinCondition;
3130
private final JoinHint joinHint;
3231

32+
public Join(
33+
UnresolvedPlan right,
34+
Optional<String> leftAlias,
35+
Optional<String> rightAlias,
36+
JoinType joinType,
37+
Optional<UnresolvedExpression> joinCondition,
38+
JoinHint joinHint) {
39+
this.right = right;
40+
this.leftAlias = leftAlias;
41+
this.rightAlias = rightAlias;
42+
this.joinType = joinType;
43+
this.joinCondition = joinCondition;
44+
this.joinHint = joinHint;
45+
}
46+
3347
@Override
3448
public UnresolvedPlan attach(UnresolvedPlan child) {
35-
this.left = leftAlias.isEmpty() ? child : new SubqueryAlias(leftAlias.get(), child);
49+
// attach child to left, meanwhile fill back side aliases if possible.
50+
if (leftAlias.isPresent()) {
51+
if (child instanceof SubqueryAlias alias) {
52+
this.left = new SubqueryAlias(leftAlias.get(), alias.getChild().getFirst());
53+
} else {
54+
this.left = new SubqueryAlias(leftAlias.get(), child);
55+
}
56+
} else {
57+
if (child instanceof SubqueryAlias alias) {
58+
leftAlias = Optional.of(alias.getAlias());
59+
}
60+
this.left = child;
61+
}
62+
if (rightAlias.isEmpty() && this.right instanceof SubqueryAlias alias) {
63+
rightAlias = Optional.of(alias.getAlias());
64+
}
3665
return this;
3766
}
3867

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package org.opensearch.sql.calcite;
77

88
import static org.apache.calcite.sql.SqlKind.AS;
9+
import static org.opensearch.sql.ast.tree.Join.JoinType.ANTI;
10+
import static org.opensearch.sql.ast.tree.Join.JoinType.SEMI;
911
import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST;
1012
import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST;
1113
import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC;
@@ -650,8 +652,42 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) {
650652
node.getJoinCondition()
651653
.map(c -> rexVisitor.analyzeJoinCondition(c, context))
652654
.orElse(context.relBuilder.literal(true));
653-
context.relBuilder.join(
654-
JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition);
655+
if (node.getJoinType() == SEMI || node.getJoinType() == ANTI) {
656+
// semi and anti join only return left table outputs
657+
context.relBuilder.join(
658+
JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition);
659+
} else {
660+
// Join condition could contain duplicated column name, Calcite will rename the duplicated
661+
// column name with numeric suffix, e.g. ON t1.id = t2.id, the output contains `id` and `id0`
662+
// when a new project add to stack. To avoid `id0`, we will rename the `id0` to `alias.id`
663+
// or `tableIdentifier.id`:
664+
List<String> leftColumns = context.relBuilder.peek(1).getRowType().getFieldNames();
665+
List<String> rightColumns = context.relBuilder.peek().getRowType().getFieldNames();
666+
List<String> rightTableName =
667+
PlanUtils.findTable(context.relBuilder.peek()).getQualifiedName();
668+
// Using `table.column` instead of `catalog.database.table.column` as column prefix because
669+
// the schema for OpenSearch index is always `OpenSearch`. But if we reuse this logic in other
670+
// query engines, the column can only be searched in current schema namespace. For example,
671+
// If the plan convert to Spark plan, and there are two table1: database1.table1 and
672+
// database2.table1. The query with column `table1.id` can only be resolved in the namespace
673+
// of "database1". User should run `using database1` before the query which access `table1.id`
674+
String rightTableQualifiedName = rightTableName.getLast();
675+
// new columns with alias or table;
676+
List<String> rightColumnsWithAliasIfConflict =
677+
rightColumns.stream()
678+
.map(
679+
col ->
680+
leftColumns.contains(col)
681+
? node.getRightAlias()
682+
.map(a -> a + "." + col)
683+
.orElse(rightTableQualifiedName + "." + col)
684+
: col)
685+
.toList();
686+
context.relBuilder.join(
687+
JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition);
688+
JoinAndLookupUtils.renameToExpectedFields(
689+
rightColumnsWithAliasIfConflict, leftColumns.size(), context);
690+
}
655691
return context.relBuilder.peek();
656692
}
657693

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import static java.util.Objects.requireNonNull;
99
import static org.apache.calcite.sql.SqlKind.AS;
10+
import static org.apache.commons.lang3.StringUtils.substringAfterLast;
1011
import static org.opensearch.sql.ast.expression.SpanUnit.NONE;
1112
import static org.opensearch.sql.ast.expression.SpanUnit.UNKNOWN;
1213
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
@@ -281,7 +282,24 @@ public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context
281282
}
282283
} else if (parts.size() == 2) {
283284
// 1.2 Handle the case of `t1.id = t2.id` or `alias1.id = alias2.id`
284-
return context.relBuilder.field(2, parts.get(0), parts.get(1));
285+
try {
286+
return context.relBuilder.field(2, parts.get(0), parts.get(1));
287+
} catch (IllegalArgumentException e) {
288+
// Similar to the step 2.3.
289+
List<String> candidates =
290+
context.relBuilder.peek(1).getRowType().getFieldNames().stream()
291+
.filter(col -> substringAfterLast(col, ".").equals(parts.getLast()))
292+
.toList();
293+
for (String candidate : candidates) {
294+
try {
295+
// field("nation2", "n2.n_name"); // pass
296+
return context.relBuilder.field(2, parts.get(0), candidate);
297+
} catch (IllegalArgumentException e1) {
298+
// field("nation2", "n_name"); // do nothing when fail (n_name is field of nation1)
299+
}
300+
}
301+
throw new UnsupportedOperationException("Unsupported qualified name: " + node);
302+
}
285303
} else if (parts.size() == 3) {
286304
throw new UnsupportedOperationException("Unsupported qualified name: " + node);
287305
}
@@ -296,21 +314,43 @@ public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context
296314
List<String> currentFields = context.relBuilder.peek().getRowType().getFieldNames();
297315
if (currentFields.contains(qualifiedName)) {
298316
// 2.1 resolve QualifiedName from stack top
317+
// Note: QualifiedName with multiple parts also could be applied in step 2.1,
318+
// for example `n2.n_name` or `nation2.n_name` in the output of join can be resolved here.
299319
return context.relBuilder.field(qualifiedName);
300320
} else if (node.getParts().size() == 2) {
301321
// 2.2 resolve QualifiedName with an alias or table name
302322
List<String> parts = node.getParts();
303323
try {
304324
return context.relBuilder.field(1, parts.get(0), parts.get(1));
305325
} catch (IllegalArgumentException e) {
306-
// 2.3 resolve QualifiedName with outer alias
326+
// 2.3 For field which renamed with <alias.field>, to resolve the field with table
327+
// identifier
328+
// `nation2.n_name`,
329+
// we convert it to resolve <table.alias.field>, e.g. `nation2.n2.n_name`
330+
// `n2.n_name` was the renamed field name from the duplicated field `(nation2.)n_name0` of
331+
// join output.
332+
// Build the candidates which contains `n_name`: e.g. `(nation1.)n_name`, `n2.n_name`
333+
List<String> candidates =
334+
context.relBuilder.peek().getRowType().getFieldNames().stream()
335+
.filter(col -> substringAfterLast(col, ".").equals(parts.getLast()))
336+
.toList();
337+
for (String candidate : candidates) {
338+
try {
339+
// field("nation2", "n2.n_name"); // pass
340+
return context.relBuilder.field(parts.get(0), candidate);
341+
} catch (IllegalArgumentException e1) {
342+
// field("nation2", "n_name"); // do nothing when fail (n_name is field of nation1)
343+
}
344+
}
345+
// 2.4 resolve QualifiedName with outer alias
346+
// check existing of parts.get(0)
307347
return context
308348
.peekCorrelVar()
309349
.map(correlVar -> context.relBuilder.field(correlVar, parts.get(1)))
310350
.orElseThrow(() -> e); // Re-throw the exception if no correlated variable exists
311351
}
312352
} else if (currentFields.stream().noneMatch(f -> f.startsWith(qualifiedName))) {
313-
// 2.4 try resolving combination of 2.1 and 2.3 to resolve rest cases
353+
// 2.5 try resolving combination of 2.1 and 2.4 to resolve rest cases
314354
return context
315355
.peekCorrelVar()
316356
.map(correlVar -> context.relBuilder.field(correlVar, qualifiedName))

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,19 @@
1616
import java.util.List;
1717
import java.util.stream.Collectors;
1818
import javax.annotation.Nullable;
19+
import org.apache.calcite.plan.RelOptTable;
20+
import org.apache.calcite.rel.RelHomogeneousShuttle;
21+
import org.apache.calcite.rel.RelNode;
22+
import org.apache.calcite.rel.RelShuttle;
23+
import org.apache.calcite.rel.core.TableScan;
1924
import org.apache.calcite.rex.RexInputRef;
2025
import org.apache.calcite.rex.RexNode;
2126
import org.apache.calcite.rex.RexVisitorImpl;
2227
import org.apache.calcite.rex.RexWindowBound;
2328
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2429
import org.apache.calcite.sql.type.SqlTypeName;
2530
import org.apache.calcite.tools.RelBuilder;
31+
import org.apache.calcite.util.Util;
2632
import org.opensearch.sql.ast.AbstractNodeVisitor;
2733
import org.opensearch.sql.ast.Node;
2834
import org.opensearch.sql.ast.expression.IntervalUnit;
@@ -311,6 +317,25 @@ public Relation visitRelation(Relation node, Object context) {
311317
return node.getChild().get(0).accept(relationVisitor, null);
312318
}
313319

320+
/** Similar to {@link org.apache.calcite.plan.RelOptUtil#findTable(RelNode, String) } */
321+
static RelOptTable findTable(RelNode root) {
322+
try {
323+
RelShuttle visitor =
324+
new RelHomogeneousShuttle() {
325+
@Override
326+
public RelNode visit(TableScan scan) {
327+
final RelOptTable scanTable = scan.getTable();
328+
throw new Util.FoundOne(scanTable);
329+
}
330+
};
331+
root.accept(visitor);
332+
return null;
333+
} catch (Util.FoundOne e) {
334+
Util.swallow(e, null);
335+
return (RelOptTable) e.getNode();
336+
}
337+
}
338+
314339
/**
315340
* Transform plan to attach specified child to the first leaf node.
316341
*

0 commit comments

Comments
 (0)