Skip to content

Commit 44195cc

Browse files
committed
Fix row estimation for parallel subquery paths.
In CBDB, path's row estimation is determined by subpath's rows and cluster segments. However, when there is a parallel subquery scan path, each worker will process fewer rows (divided by parallel_workers). This commit fixes that issue. The correction not only makes parallel subquery estimation more accurate, but also enables the entire plan to be as parallel as possible, particularly for subqueries in complex queries. Authored-by: Zhang Mingli avamingli@gmail.com
1 parent d5967fd commit 44195cc

File tree

6 files changed

+174
-8
lines changed

6 files changed

+174
-8
lines changed

src/backend/optimizer/path/costsize.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,10 +1540,10 @@ cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
15401540
Assert(baserel->relid > 0);
15411541
Assert(baserel->rtekind == RTE_SUBQUERY);
15421542

1543-
/* Adjust row count if this runs in multiple segments and parallel model */
15441543
if (CdbPathLocus_IsPartitioned(path->path.locus))
15451544
{
1546-
numsegments = CdbPathLocus_NumSegments(path->path.locus);
1545+
/* Adjust row count if this runs in multiple segments and parallel model */
1546+
numsegments = CdbPathLocus_NumSegmentsPlusParallelWorkers(path->path.locus);
15471547
}
15481548
else
15491549
numsegments = 1;

src/test/regress/expected/cbdb_parallel.out

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3415,6 +3415,110 @@ reset enable_parallel;
34153415
--
34163416
-- End of test Parallel UNION
34173417
--
3418+
--
3419+
-- Test Parallel Subquery.
3420+
--
3421+
CREATE TABLE departments (
3422+
department_id INT PRIMARY KEY,
3423+
department_name VARCHAR(100)
3424+
);
3425+
CREATE TABLE employees (
3426+
employee_id INT PRIMARY KEY,
3427+
name VARCHAR(100),
3428+
salary NUMERIC,
3429+
department_id INT
3430+
);
3431+
INSERT INTO departments VALUES
3432+
(1, 'Sales'),
3433+
(2, 'IT'),
3434+
(3, 'HR');
3435+
INSERT INTO employees VALUES
3436+
(1, 'Alice', 5000, 1),
3437+
(2, 'Bob', 6000, 1),
3438+
(3, 'Charlie', 7000, 2),
3439+
(4, 'David', 8000, 2),
3440+
(5, 'Eve', 9000, 3);
3441+
set enable_parallel = off;
3442+
explain SELECT e.name
3443+
FROM employees e
3444+
WHERE e.salary > (
3445+
SELECT AVG(salary)
3446+
FROM employees
3447+
WHERE department_id = e.department_id);
3448+
QUERY PLAN
3449+
----------------------------------------------------------------------------------------------------------------------------
3450+
Gather Motion 3:1 (slice1; segments: 3) (cost=163.42..307.76 rows=3767 width=218)
3451+
-> Hash Join (cost=163.42..257.54 rows=1256 width=218)
3452+
Hash Cond: (e.department_id = "Expr_SUBQUERY".csq_c0)
3453+
Join Filter: (e.salary > "Expr_SUBQUERY".csq_c1)
3454+
-> Seq Scan on employees e (cost=0.00..71.67 rows=3767 width=254)
3455+
-> Hash (cost=150.92..150.92 rows=1000 width=36)
3456+
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=130.09..150.92 rows=1000 width=36)
3457+
-> Subquery Scan on "Expr_SUBQUERY" (cost=130.09..137.59 rows=333 width=36)
3458+
-> Finalize HashAggregate (cost=130.09..134.26 rows=333 width=36)
3459+
Group Key: employees.department_id
3460+
-> Redistribute Motion 3:3 (slice3; segments: 3) (cost=90.50..122.67 rows=990 width=36)
3461+
Hash Key: employees.department_id
3462+
-> Partial HashAggregate (cost=90.50..102.87 rows=990 width=36)
3463+
Group Key: employees.department_id
3464+
-> Seq Scan on employees (cost=0.00..71.67 rows=3767 width=36)
3465+
Optimizer: Postgres query optimizer
3466+
(16 rows)
3467+
3468+
SELECT e.name
3469+
FROM employees e
3470+
WHERE e.salary > (
3471+
SELECT AVG(salary)
3472+
FROM employees
3473+
WHERE department_id = e.department_id);
3474+
name
3475+
-------
3476+
Bob
3477+
David
3478+
(2 rows)
3479+
3480+
set enable_parallel = on;
3481+
set min_parallel_table_scan_size = 0;
3482+
explain SELECT e.name
3483+
FROM employees e
3484+
WHERE e.salary > (
3485+
SELECT AVG(salary)
3486+
FROM employees
3487+
WHERE department_id = e.department_id);
3488+
QUERY PLAN
3489+
---------------------------------------------------------------------------------------------------------------------------
3490+
Gather Motion 6:1 (slice1; segments: 6) (cost=116.58..230.86 rows=3767 width=218)
3491+
-> Parallel Hash Join (cost=116.58..186.92 rows=628 width=218)
3492+
Hash Cond: (e.department_id = "Expr_SUBQUERY".csq_c0)
3493+
Join Filter: (e.salary > "Expr_SUBQUERY".csq_c1)
3494+
-> Parallel Seq Scan on employees e (cost=0.00..52.83 rows=1883 width=254)
3495+
-> Parallel Hash (cost=110.33..110.33 rows=500 width=36)
3496+
-> Broadcast Workers Motion 6:6 (slice2; segments: 6) (cost=99.92..110.33 rows=500 width=36)
3497+
-> Subquery Scan on "Expr_SUBQUERY" (cost=99.92..103.67 rows=167 width=36)
3498+
-> HashAggregate (cost=99.92..102.00 rows=167 width=36)
3499+
Group Key: employees.department_id
3500+
-> Redistribute Motion 6:6 (slice3; segments: 6) (cost=0.00..90.50 rows=1883 width=36)
3501+
Hash Key: employees.department_id
3502+
Hash Module: 3
3503+
-> Parallel Seq Scan on employees (cost=0.00..52.83 rows=1883 width=36)
3504+
Optimizer: Postgres query optimizer
3505+
(15 rows)
3506+
3507+
SELECT e.name
3508+
FROM employees e
3509+
WHERE e.salary > (
3510+
SELECT AVG(salary)
3511+
FROM employees
3512+
WHERE department_id = e.department_id);
3513+
name
3514+
-------
3515+
Bob
3516+
David
3517+
(2 rows)
3518+
3519+
3520+
reset enable_parallel;
3521+
reset min_parallel_table_scan_size;
34183522
-- start_ignore
34193523
drop schema test_parallel cascade;
34203524
-- end_ignore

src/test/regress/expected/incremental_sort.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,6 @@ select explain_analyze_without_memory('select * from (select * from t order by a
589589
Optimizer: Postgres query optimizer
590590
(13 rows)
591591

592-
reset max_parallel_workers_per_gather;
593592
select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 55'));
594593
jsonb_pretty
595594
-------------------------------------------------
@@ -626,6 +625,7 @@ select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from
626625
]
627626
(1 row)
628627

628+
reset max_parallel_workers_per_gather;
629629
select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 55');
630630
explain_analyze_inc_sort_nodes_verify_invariants
631631
--------------------------------------------------
@@ -815,7 +815,6 @@ select explain_analyze_without_memory('select * from (select * from t order by a
815815
Optimizer: Postgres query optimizer
816816
(14 rows)
817817

818-
reset max_parallel_workers_per_gather;
819818
select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 70'));
820819
jsonb_pretty
821820
-------------------------------------------------
@@ -861,6 +860,7 @@ select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from
861860
]
862861
(1 row)
863862

863+
reset max_parallel_workers_per_gather;
864864
select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 70');
865865
explain_analyze_inc_sort_nodes_verify_invariants
866866
--------------------------------------------------

src/test/regress/expected/incremental_sort_optimizer.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,14 +560,14 @@ select explain_analyze_without_memory('select * from (select * from t order by a
560560
Optimizer: Pivotal Optimizer (GPORCA)
561561
(9 rows)
562562

563-
reset max_parallel_workers_per_gather;
564563
select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 55'));
565564
jsonb_pretty
566565
--------------
567566
[ +
568567
]
569568
(1 row)
570569

570+
reset max_parallel_workers_per_gather;
571571
select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 55');
572572
explain_analyze_inc_sort_nodes_verify_invariants
573573
--------------------------------------------------
@@ -743,14 +743,14 @@ select explain_analyze_without_memory('select * from (select * from t order by a
743743
Optimizer: Pivotal Optimizer (GPORCA)
744744
(10 rows)
745745

746-
reset max_parallel_workers_per_gather;
747746
select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 70'));
748747
jsonb_pretty
749748
--------------
750749
[ +
751750
]
752751
(1 row)
753752

753+
reset max_parallel_workers_per_gather;
754754
select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 70');
755755
explain_analyze_inc_sort_nodes_verify_invariants
756756
--------------------------------------------------

src/test/regress/sql/cbdb_parallel.sql

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,68 @@ reset enable_parallel;
10741074
-- End of test Parallel UNION
10751075
--
10761076

1077+
--
1078+
-- Test Parallel Subquery.
1079+
--
1080+
CREATE TABLE departments (
1081+
department_id INT PRIMARY KEY,
1082+
department_name VARCHAR(100)
1083+
);
1084+
1085+
CREATE TABLE employees (
1086+
employee_id INT PRIMARY KEY,
1087+
name VARCHAR(100),
1088+
salary NUMERIC,
1089+
department_id INT
1090+
);
1091+
1092+
INSERT INTO departments VALUES
1093+
(1, 'Sales'),
1094+
(2, 'IT'),
1095+
(3, 'HR');
1096+
1097+
INSERT INTO employees VALUES
1098+
(1, 'Alice', 5000, 1),
1099+
(2, 'Bob', 6000, 1),
1100+
(3, 'Charlie', 7000, 2),
1101+
(4, 'David', 8000, 2),
1102+
(5, 'Eve', 9000, 3);
1103+
1104+
set enable_parallel = off;
1105+
explain SELECT e.name
1106+
FROM employees e
1107+
WHERE e.salary > (
1108+
SELECT AVG(salary)
1109+
FROM employees
1110+
WHERE department_id = e.department_id);
1111+
1112+
SELECT e.name
1113+
FROM employees e
1114+
WHERE e.salary > (
1115+
SELECT AVG(salary)
1116+
FROM employees
1117+
WHERE department_id = e.department_id);
1118+
1119+
set enable_parallel = on;
1120+
set min_parallel_table_scan_size = 0;
1121+
1122+
explain SELECT e.name
1123+
FROM employees e
1124+
WHERE e.salary > (
1125+
SELECT AVG(salary)
1126+
FROM employees
1127+
WHERE department_id = e.department_id);
1128+
1129+
SELECT e.name
1130+
FROM employees e
1131+
WHERE e.salary > (
1132+
SELECT AVG(salary)
1133+
FROM employees
1134+
WHERE department_id = e.department_id);
1135+
1136+
reset enable_parallel;
1137+
reset min_parallel_table_scan_size;
1138+
10771139
-- start_ignore
10781140
drop schema test_parallel cascade;
10791141
-- end_ignore

src/test/regress/sql/incremental_sort.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ select * from (select * from t order by a) s order by a, b limit 55;
147147
-- Test EXPLAIN ANALYZE with only a fullsort group.
148148
set max_parallel_workers_per_gather = 0;
149149
select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 55');
150-
reset max_parallel_workers_per_gather;
151150
select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 55'));
151+
reset max_parallel_workers_per_gather;
152152
select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 55');
153153
delete from t;
154154

@@ -181,8 +181,8 @@ rollback;
181181
-- Test EXPLAIN ANALYZE with both fullsort and presorted groups.
182182
set max_parallel_workers_per_gather = 0;
183183
select explain_analyze_without_memory('select * from (select * from t order by a) s order by a, b limit 70');
184-
reset max_parallel_workers_per_gather;
185184
select jsonb_pretty(explain_analyze_inc_sort_nodes_without_memory('select * from (select * from t order by a) s order by a, b limit 70'));
185+
reset max_parallel_workers_per_gather;
186186
select explain_analyze_inc_sort_nodes_verify_invariants('select * from (select * from t order by a) s order by a, b limit 70');
187187
delete from t;
188188

0 commit comments

Comments
 (0)