Skip to content

Commit 3752952

Browse files
committed
add test and fix 9797
1 parent f485924 commit 3752952

File tree

4 files changed

+42
-5
lines changed

4 files changed

+42
-5
lines changed

enginetest/queries/script_queries.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11780,6 +11780,26 @@ select * from t1 except (
1178011780
},
1178111781
},
1178211782
},
11783+
{
11784+
// https://github.com/dolthub/dolt/issues/9797
11785+
Name: "EXISTS subquery duplicate rows issue",
11786+
SetUpScript: []string{
11787+
"CREATE TABLE t(c0 INT, c1 INT, PRIMARY KEY(c0, c1));",
11788+
"INSERT INTO t VALUES (1, 1);",
11789+
"INSERT INTO t VALUES (2, 2);",
11790+
"INSERT INTO t VALUES (2, 3);",
11791+
},
11792+
Assertions: []ScriptTestAssertion{
11793+
{
11794+
Query: "SELECT * FROM t WHERE EXISTS (SELECT 1 FROM t AS x WHERE x.c0 = t.c0);",
11795+
Expected: []sql.Row{
11796+
{1, 1},
11797+
{2, 2},
11798+
{2, 3},
11799+
},
11800+
},
11801+
},
11802+
},
1178311803
}
1178411804

1178511805
var SpatialScriptTests = []ScriptTest{

sql/analyzer/indexed_joins.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -467,12 +467,11 @@ func convertSemiToInnerJoin(m *memo.Memo) error {
467467

468468
// project is a new group
469469
rightGrp := m.MemoizeProject(nil, semi.Right, projectExpressions)
470-
if _, ok := semi.Right.First.(*memo.Distinct); !ok {
471-
rightGrp.RelProps.Distinct = memo.HashDistinctOp
472-
}
473470

474471
// join and its commute are a new group
475472
joinGrp := m.MemoizeInnerJoin(nil, semi.Left, rightGrp, plan.JoinTypeInner, semi.Filter)
473+
// Ensure DISTINCT operation to prevent duplicate rows in SemiJoin to InnerJoin conversion
474+
joinGrp.RelProps.Distinct = memo.HashDistinctOp
476475
// TODO: can't commute if right SubqueryAlias references outside scope (OuterScopeVisibility/IsLateral)
477476
m.MemoizeInnerJoin(joinGrp, rightGrp, semi.Left, plan.JoinTypeInner, semi.Filter)
478477

sql/analyzer/unnest_exists_subqueries.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,12 @@ func unnestExistSubqueries(ctx *sql.Context, scope *plan.Scope, a *Analyzer, fil
193193
ret = plan.NewAntiJoinIncludingNulls(ret, s.inner, cond).WithComment(comment)
194194
qFlags.Set(sql.QFlagInnerJoin)
195195
case plan.JoinTypeSemi:
196-
ret = plan.NewCrossJoin(ret, s.inner).WithComment(comment)
197-
qFlags.Set(sql.QFlagCrossJoin)
196+
// Use SemiJoin with TRUE condition instead of CrossJoin to preserve EXISTS semantics
197+
// CrossJoin would emit each outer row once per inner row, but EXISTS should emit
198+
// each outer row at most once when there's at least one inner row
199+
cond := expression.NewLiteral(true, types.Boolean)
200+
ret = plan.NewSemiJoin(ret, s.inner, cond).WithComment(comment)
201+
qFlags.Set(sql.QFlagInnerJoin)
198202
default:
199203
return filter, transform.SameTree, fmt.Errorf("hoistSelectExists failed on unexpected join type")
200204
}

sql/rowexec/join_iters.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,20 @@ func (i *joinIter) Next(ctx *sql.Context) (sql.Row, error) {
185185
}
186186

187187
i.foundMatch = true
188+
189+
// For semi joins, close secondary iterator and move to next primary row after first match
190+
// This ensures each primary row is emitted at most once (semi join semantics)
191+
if i.joinType.IsSemi() {
192+
err = i.secondary.Close(ctx)
193+
i.secondary = nil
194+
if err != nil {
195+
return nil, err
196+
}
197+
i.loadPrimaryRow = true
198+
// For semi joins, return only the primary row, not the combined row
199+
return i.removeParentRow(i.buildRow(primary, nil)), nil
200+
}
201+
188202
return i.removeParentRow(row), nil
189203
}
190204
}

0 commit comments

Comments
 (0)