Skip to content

Commit 36a4308

Browse files
authored
planner: support left outer join into anti semi join (#64959)
close #64329
1 parent 6dd3a58 commit 36a4308

File tree

12 files changed

+1838
-9
lines changed

12 files changed

+1838
-9
lines changed

pkg/planner/core/casetest/rule/BUILD.bazel

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ go_test(
1212
"rule_inject_extra_projection_test.go",
1313
"rule_join_reorder_test.go",
1414
"rule_outer2inner_test.go",
15+
"rule_outer_to_semi_join_test.go",
1516
"rule_predicate_pushdown_test.go",
1617
"rule_predicate_simplification_test.go",
1718
],
1819
data = glob(["testdata/**"]),
1920
flaky = True,
20-
shard_count = 14,
21+
shard_count = 15,
2122
deps = [
2223
"//pkg/config",
2324
"//pkg/domain",

pkg/planner/core/casetest/rule/main_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ func TestMain(m *testing.M) {
3434
testDataMap.LoadTestSuiteData("testdata", "join_reorder_suite", true)
3535
testDataMap.LoadTestSuiteData("testdata", "predicate_pushdown_suite", true)
3636
testDataMap.LoadTestSuiteData("testdata", "predicate_simplification", true)
37+
testDataMap.LoadTestSuiteData("testdata", "outer_to_semi_join_suite", true)
38+
3739
opts := []goleak.Option{
3840
goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"),
3941
goleak.IgnoreTopFunction("github.com/bazelbuild/rules_go/go/tools/bzltestutil.RegisterTimeoutHandler.func1"),
@@ -71,3 +73,7 @@ func GetPredicatePushdownSuiteData() testdata.TestData {
7173
func GetPredicateSimplificationSuiteData() testdata.TestData {
7274
return testDataMap["predicate_simplification"]
7375
}
76+
77+
func GetOuterToSemiJoinSuiteData() testdata.TestData {
78+
return testDataMap["outer_to_semi_join_suite"]
79+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2025 PingCAP, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package rule
16+
17+
import (
18+
"testing"
19+
20+
"github.com/pingcap/tidb/pkg/testkit"
21+
"github.com/pingcap/tidb/pkg/testkit/testdata"
22+
)
23+
24+
func TestOuterToSemiJoin(tt *testing.T) {
25+
testkit.RunTestUnderCascades(tt, func(t *testing.T, tk *testkit.TestKit, cascades, caller string) {
26+
tk.MustExec("use test")
27+
tk.MustExec("drop table if exists A, B")
28+
// A.id is not a primary key to allow NULL values for the test.
29+
tk.MustExec("CREATE TABLE A (id INT, val INT, nullable_val INT)")
30+
tk.MustExec("CREATE TABLE B (id INT PRIMARY KEY, a_id INT, val INT, non_null_col INT NOT NULL, nullable_col INT)")
31+
32+
// Insert data into A
33+
// A.val=10, 20, NULL, 40
34+
tk.MustExec("INSERT INTO A VALUES (1, 10, 100), (2, 20, NULL), (3, NULL, 300), (4, 40, 400), (NULL, 50, 500)")
35+
36+
// Insert data into B
37+
// B.val=10 matches A.val=10.
38+
// B.val=NULL matches A.val=NULL via <=>.
39+
// B.val=500 and 600 have no match in A.val.
40+
tk.MustExec("INSERT INTO B VALUES (101, 1, 10, 1, 1), (102, 2, NULL, 2, NULL), (103, 5, 500, 5, 5), (104, NULL, 600, 6, 6)")
41+
42+
var input []string
43+
var output []struct {
44+
SQL string
45+
Plan []string
46+
Result []string
47+
}
48+
suite := GetOuterToSemiJoinSuiteData()
49+
suite.LoadTestCases(t, &input, &output, cascades, caller)
50+
for i, sql := range input {
51+
testdata.OnRecord(func() {
52+
output[i].SQL = sql
53+
output[i].Plan = testdata.ConvertRowsToStrings(tk.MustQuery("EXPLAIN FORMAT='plan_tree' " + sql).Rows())
54+
output[i].Result = testdata.ConvertRowsToStrings(tk.MustQuery(sql).Rows())
55+
})
56+
tk.MustQuery("EXPLAIN FORMAT='plan_tree' " + sql).Check(testkit.Rows(output[i].Plan...))
57+
tk.MustQuery(sql).Check(testkit.Rows(output[i].Result...))
58+
}
59+
})
60+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
[
2+
{
3+
"name": "TestOuterToSemiJoin",
4+
"cases": [
5+
// GROUP 1: LEFT JOIN to SEMI JOIN (Positive cases, equi-join on id)
6+
"select A.* from A left join B on A.id = B.a_id where B.val > 0",
7+
"select A.* from A left join B on A.id = B.a_id is null where B.val > 0",
8+
"select A.* from A left join B on A.id <=> B.a_id where B.val > 0",
9+
"select A.* from A left join B on A.id = B.a_id and B.val > 100 where B.id is not null",
10+
11+
// GROUP 2: LEFT JOIN to ANTI SEMI JOIN (Positive cases, equi-join on id)
12+
"select A.* from A left join B on A.id = B.a_id where B.id IS NULL order by A.id",
13+
"select A.* from A left join B on A.id = B.a_id where B.non_null_col IS NULL",
14+
"select A.* from A left join B on A.id = B.a_id is null where B.non_null_col IS NULL",
15+
"select A.* from A left join B on A.id <=> B.a_id where B.id IS NULL",
16+
"select A.* from A left join B on A.id = B.a_id where B.id is null and A.val > 30 order by A.id",
17+
18+
// GROUP 3: LEFT JOIN (Negative cases, should NOT be converted, equi-join on id)
19+
"select A.* from A left join B on A.id = B.a_id where B.nullable_col IS NULL",
20+
"select A.* from A left join B on A.id = B.a_id where B.id IS NULL OR B.val > 100",
21+
"select A.* from A left join B on A.id = B.a_id and B.val > 500 where B.id is null",
22+
23+
// GROUP 4: RIGHT JOIN to SEMI/ANTI SEMI JOIN (Positive cases, equi-join)
24+
"select B.* from A right join B on A.id = B.a_id where A.val > 10",
25+
"select B.* from A right join B on A.id = B.a_id where A.id IS NULL",
26+
"select B.* from A right join B on A.id = B.a_id is null where A.id IS NULL",
27+
"select B.* from A right join B on A.id <=> B.a_id where A.id IS NULL",
28+
"select B.* from A right join B on A.id = B.a_id where A.id is null and B.val > 100 order by B.id",
29+
"select B.* from A right join B on A.val <=> B.val where A.id IS NULL",
30+
"select B.* from A right join B on A.val <=> B.val where A.id IS NOT NULL",
31+
"select B.* from A right join B on A.id = B.a_id and B.val > 100 where A.id is null",
32+
33+
// GROUP 5: RIGHT JOIN (Negative cases, should NOT be converted, equi-join)
34+
"select B.* from A right join B on A.id = B.a_id where A.nullable_val IS NULL",
35+
"select B.* from A right join B on A.id = B.a_id where A.id IS NULL OR A.val > 20",
36+
37+
// GROUP 6: Non-Equi JOINs
38+
// Sub-group: Positive cases (should be converted to Anti Semi Join)
39+
"select A.* from A left join B on A.id < B.a_id where B.id is null",
40+
"select A.* from A left join B on A.id > B.a_id where B.non_null_col is null",
41+
"select B.* from A right join B on A.id > B.a_id where A.id is null",
42+
// Sub-group: Negative cases (should NOT be converted)
43+
"select A.* from A left join B on A.id > B.a_id where B.val > 0",
44+
"select A.* from A left join B on A.id > B.a_id where B.nullable_col is null",
45+
46+
// GROUP 7: JOIN on nullable & non-unique columns (A.val = B.val)
47+
"select A.* from A left join B on A.val = B.val where B.id IS NULL",
48+
"select A.* from A left join B on A.val = B.val where B.non_null_col IS NULL",
49+
"select A.* from A left join B on A.val = B.val where B.nullable_col IS NULL",
50+
"select A.* from A left join B on A.val <=> B.val where B.id IS NULL",
51+
"select A.* from A left join B on A.val <=> B.val where B.non_null_col > 0",
52+
53+
// GROUP 8: Projecting columns from both tables (Negative cases for Semi Join conversion)
54+
// These conversions should NOT happen because Semi/Anti-Semi Join cannot project columns from the inner table.
55+
"select A.*, B.val from A left join B on A.id = B.a_id where B.val > 10",
56+
"select A.id, B.val from A left join B on A.id <=> B.a_id where B.val > 0",
57+
"select A.id, B.non_null_col from A left join B on A.id = B.a_id where B.non_null_col IS NULL",
58+
"select B.id, A.val from A right join B on A.id = B.a_id where A.val > 10",
59+
"select B.val, A.id from A right join B on A.val <=> B.val where A.id IS NULL",
60+
"select A.id, B.id from A left join B on A.id < B.a_id where B.id is null",
61+
"select A.val, B.val from A left join B on A.val = B.val where B.non_null_col IS NULL",
62+
"select A.id, B.val from A left join B on A.id = B.a_id where B.val > 0"
63+
]
64+
}
65+
]

0 commit comments

Comments
 (0)