Skip to content

Commit fe30c62

Browse files
author
Maksim Milyutin
committed
Add parallel nodes support
1 parent f603e6c commit fe30c62

File tree

2 files changed

+303
-1
lines changed

2 files changed

+303
-1
lines changed

src/hooks.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,11 +324,22 @@ pathman_rel_pathlist_hook(PlannerInfo *root,
324324

325325
/* Clear old path list */
326326
list_free(rel->pathlist);
327-
328327
rel->pathlist = NIL;
328+
329+
#if PG_VERSION_NUM >= 90600
330+
/* Clear old partial path list */
331+
list_free(rel->partial_pathlist);
332+
rel->partial_pathlist = NIL;
333+
#endif
334+
329335
set_append_rel_pathlist(root, rel, rti, rte, pathkeyAsc, pathkeyDesc);
330336
set_append_rel_size_compat(root, rel, rti, rte);
331337

338+
#if PG_VERSION_NUM >= 90600
339+
/* consider gathering partial paths for the parent appendrel */
340+
generate_gather_paths(root, rel);
341+
#endif
342+
332343
/* No need to go further (both nodes are disabled), return */
333344
if (!(pg_pathman_enable_runtimeappend ||
334345
pg_pathman_enable_runtime_merge_append))

src/pg_pathman.c

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "access/transam.h"
2727
#include "access/xact.h"
2828
#include "catalog/pg_cast.h"
29+
#include "catalog/pg_proc.h"
2930
#include "catalog/pg_type.h"
3031
#include "executor/spi.h"
3132
#include "foreign/fdwapi.h"
@@ -98,6 +99,11 @@ static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
9899
PathKey *pathkeyAsc,
99100
PathKey *pathkeyDesc);
100101
static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer);
102+
#if PG_VERSION_NUM >= 90600
103+
static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
104+
static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
105+
RangeTblEntry *rte);
106+
#endif
101107

102108

103109
/*
@@ -1756,6 +1762,12 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
17561762
#endif
17571763
add_path(rel, path);
17581764

1765+
#if PG_VERSION_NUM >= 90600
1766+
/* If appropriate, consider parallel sequential scan */
1767+
if (rel->consider_parallel && required_outer == NULL)
1768+
create_plain_partial_paths(root, rel);
1769+
#endif
1770+
17591771
/* Consider index scans */
17601772
create_index_paths(root, rel);
17611773

@@ -1804,6 +1816,10 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
18041816
List *live_childrels = NIL;
18051817
List *subpaths = NIL;
18061818
bool subpaths_valid = true;
1819+
#if PG_VERSION_NUM >= 90600
1820+
List *partial_subpaths = NIL;
1821+
bool partial_subpaths_valid = true;
1822+
#endif
18071823
List *all_child_pathkeys = NIL;
18081824
List *all_child_outers = NIL;
18091825
ListCell *l;
@@ -1831,6 +1847,18 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
18311847
childRTE = root->simple_rte_array[childRTindex];
18321848
childrel = root->simple_rel_array[childRTindex];
18331849

1850+
#if PG_VERSION_NUM >= 90600
1851+
/*
1852+
* If parallelism is allowable for this query in general and for parent
1853+
* appendrel, see whether it's allowable for this childrel in
1854+
* particular.
1855+
*
1856+
* For consistency, do this before calling set_rel_size() for the child.
1857+
*/
1858+
if (root->glob->parallelModeOK && rel->consider_parallel)
1859+
set_rel_consider_parallel(root, childrel, childRTE);
1860+
#endif
1861+
18341862
/*
18351863
* Compute the child's access paths.
18361864
*/
@@ -1857,6 +1885,18 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
18571885
*/
18581886
live_childrels = lappend(live_childrels, childrel);
18591887

1888+
#if PG_VERSION_NUM >= 90600
1889+
/*
1890+
* If any live child is not parallel-safe, treat the whole appendrel
1891+
* as not parallel-safe. In future we might be able to generate plans
1892+
* in which some children are farmed out to workers while others are
1893+
* not; but we don't have that today, so it's a waste to consider
1894+
* partial paths anywhere in the appendrel unless it's all safe.
1895+
*/
1896+
if (!childrel->consider_parallel)
1897+
rel->consider_parallel = false;
1898+
#endif
1899+
18601900
/*
18611901
* If child has an unparameterized cheapest-total path, add that to
18621902
* the unparameterized Append path we are constructing for the parent.
@@ -1868,6 +1908,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
18681908
else
18691909
subpaths_valid = false;
18701910

1911+
#if PG_VERSION_NUM >= 90600
1912+
/* Same idea, but for a partial plan. */
1913+
if (childrel->partial_pathlist != NIL)
1914+
partial_subpaths = accumulate_append_subpath(partial_subpaths,
1915+
linitial(childrel->partial_pathlist));
1916+
else
1917+
partial_subpaths_valid = false;
1918+
#endif
1919+
18711920
/*
18721921
* Collect lists of all the available path orderings and
18731922
* parameterizations for all the children. We use these as a
@@ -1942,6 +1991,37 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
19421991
add_path(rel,
19431992
(Path *) create_append_path_compat(rel, subpaths, NULL, 0));
19441993

1994+
#if PG_VERSION_NUM >= 90600
1995+
/*
1996+
* Consider an append of partial unordered, unparameterized partial paths.
1997+
*/
1998+
if (partial_subpaths_valid)
1999+
{
2000+
AppendPath *appendpath;
2001+
ListCell *lc;
2002+
int parallel_workers = 0;
2003+
2004+
/*
2005+
* Decide on the number of workers to request for this append path.
2006+
* For now, we just use the maximum value from among the members. It
2007+
* might be useful to use a higher number if the Append node were
2008+
* smart enough to spread out the workers, but it currently isn't.
2009+
*/
2010+
foreach(lc, partial_subpaths)
2011+
{
2012+
Path *path = lfirst(lc);
2013+
2014+
parallel_workers = Max(parallel_workers, path->parallel_workers);
2015+
}
2016+
Assert(parallel_workers > 0);
2017+
2018+
/* Generate a partial append path. */
2019+
appendpath = create_append_path(rel, partial_subpaths, NULL,
2020+
parallel_workers);
2021+
add_partial_path(rel, (Path *) appendpath);
2022+
}
2023+
#endif
2024+
19452025
/*
19462026
* Also build unparameterized MergeAppend paths based on the collected
19472027
* list of child pathkeys.
@@ -1995,6 +2075,217 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
19952075
}
19962076
}
19972077

2078+
#if PG_VERSION_NUM >= 90600
2079+
/*
2080+
* create_plain_partial_paths
2081+
* Build partial access paths for parallel scan of a plain relation
2082+
*/
2083+
static void
2084+
create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
2085+
{
2086+
int parallel_workers;
2087+
2088+
/*
2089+
* If the user has set the parallel_workers reloption, use that; otherwise
2090+
* select a default number of workers.
2091+
*/
2092+
if (rel->rel_parallel_workers != -1)
2093+
parallel_workers = rel->rel_parallel_workers;
2094+
else
2095+
{
2096+
int parallel_threshold;
2097+
2098+
/*
2099+
* If this relation is too small to be worth a parallel scan, just
2100+
* return without doing anything ... unless it's an inheritance child.
2101+
* In that case, we want to generate a parallel path here anyway. It
2102+
* might not be worthwhile just for this relation, but when combined
2103+
* with all of its inheritance siblings it may well pay off.
2104+
*/
2105+
if (rel->pages < (BlockNumber) min_parallel_relation_size &&
2106+
rel->reloptkind == RELOPT_BASEREL)
2107+
return;
2108+
2109+
/*
2110+
* Select the number of workers based on the log of the size of the
2111+
* relation. This probably needs to be a good deal more
2112+
* sophisticated, but we need something here for now. Note that the
2113+
* upper limit of the min_parallel_relation_size GUC is chosen to
2114+
* prevent overflow here.
2115+
*/
2116+
parallel_workers = 1;
2117+
parallel_threshold = Max(min_parallel_relation_size, 1);
2118+
while (rel->pages >= (BlockNumber) (parallel_threshold * 3))
2119+
{
2120+
parallel_workers++;
2121+
parallel_threshold *= 3;
2122+
if (parallel_threshold > INT_MAX / 3)
2123+
break; /* avoid overflow */
2124+
}
2125+
}
2126+
2127+
/*
2128+
* In no case use more than max_parallel_workers_per_gather workers.
2129+
*/
2130+
parallel_workers = Min(parallel_workers, max_parallel_workers_per_gather);
2131+
2132+
/* If any limit was set to zero, the user doesn't want a parallel scan. */
2133+
if (parallel_workers <= 0)
2134+
return;
2135+
2136+
/* Add an unordered partial path based on a parallel sequential scan. */
2137+
add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
2138+
}
2139+
2140+
/*
2141+
* If this relation could possibly be scanned from within a worker, then set
2142+
* its consider_parallel flag.
2143+
*/
2144+
static void
2145+
set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel,
2146+
RangeTblEntry *rte)
2147+
{
2148+
/*
2149+
* The flag has previously been initialized to false, so we can just
2150+
* return if it becomes clear that we can't safely set it.
2151+
*/
2152+
Assert(!rel->consider_parallel);
2153+
2154+
/* Don't call this if parallelism is disallowed for the entire query. */
2155+
Assert(root->glob->parallelModeOK);
2156+
2157+
/* This should only be called for baserels and appendrel children. */
2158+
Assert(rel->reloptkind == RELOPT_BASEREL ||
2159+
rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
2160+
2161+
/* Assorted checks based on rtekind. */
2162+
switch (rte->rtekind)
2163+
{
2164+
case RTE_RELATION:
2165+
2166+
/*
2167+
* Currently, parallel workers can't access the leader's temporary
2168+
* tables. We could possibly relax this if the wrote all of its
2169+
* local buffers at the start of the query and made no changes
2170+
* thereafter (maybe we could allow hint bit changes), and if we
2171+
* taught the workers to read them. Writing a large number of
2172+
* temporary buffers could be expensive, though, and we don't have
2173+
* the rest of the necessary infrastructure right now anyway. So
2174+
* for now, bail out if we see a temporary table.
2175+
*/
2176+
if (get_rel_persistence(rte->relid) == RELPERSISTENCE_TEMP)
2177+
return;
2178+
2179+
/*
2180+
* Table sampling can be pushed down to workers if the sample
2181+
* function and its arguments are safe.
2182+
*/
2183+
if (rte->tablesample != NULL)
2184+
{
2185+
Oid proparallel = func_parallel(rte->tablesample->tsmhandler);
2186+
2187+
if (proparallel != PROPARALLEL_SAFE)
2188+
return;
2189+
if (has_parallel_hazard((Node *) rte->tablesample->args,
2190+
false))
2191+
return;
2192+
}
2193+
2194+
/*
2195+
* Ask FDWs whether they can support performing a ForeignScan
2196+
* within a worker. Most often, the answer will be no. For
2197+
* example, if the nature of the FDW is such that it opens a TCP
2198+
* connection with a remote server, each parallel worker would end
2199+
* up with a separate connection, and these connections might not
2200+
* be appropriately coordinated between workers and the leader.
2201+
*/
2202+
if (rte->relkind == RELKIND_FOREIGN_TABLE)
2203+
{
2204+
Assert(rel->fdwroutine);
2205+
if (!rel->fdwroutine->IsForeignScanParallelSafe)
2206+
return;
2207+
if (!rel->fdwroutine->IsForeignScanParallelSafe(root, rel, rte))
2208+
return;
2209+
}
2210+
2211+
/*
2212+
* There are additional considerations for appendrels, which we'll
2213+
* deal with in set_append_rel_size and set_append_rel_pathlist.
2214+
* For now, just set consider_parallel based on the rel's own
2215+
* quals and targetlist.
2216+
*/
2217+
break;
2218+
2219+
case RTE_SUBQUERY:
2220+
2221+
/*
2222+
* There's no intrinsic problem with scanning a subquery-in-FROM
2223+
* (as distinct from a SubPlan or InitPlan) in a parallel worker.
2224+
* If the subquery doesn't happen to have any parallel-safe paths,
2225+
* then flagging it as consider_parallel won't change anything,
2226+
* but that's true for plain tables, too. We must set
2227+
* consider_parallel based on the rel's own quals and targetlist,
2228+
* so that if a subquery path is parallel-safe but the quals and
2229+
* projection we're sticking onto it are not, we correctly mark
2230+
* the SubqueryScanPath as not parallel-safe. (Note that
2231+
* set_subquery_pathlist() might push some of these quals down
2232+
* into the subquery itself, but that doesn't change anything.)
2233+
*/
2234+
break;
2235+
2236+
case RTE_JOIN:
2237+
/* Shouldn't happen; we're only considering baserels here. */
2238+
Assert(false);
2239+
return;
2240+
2241+
case RTE_FUNCTION:
2242+
/* Check for parallel-restricted functions. */
2243+
if (has_parallel_hazard((Node *) rte->functions, false))
2244+
return;
2245+
break;
2246+
2247+
case RTE_VALUES:
2248+
/* Check for parallel-restricted functions. */
2249+
if (has_parallel_hazard((Node *) rte->values_lists, false))
2250+
return;
2251+
break;
2252+
2253+
case RTE_CTE:
2254+
2255+
/*
2256+
* CTE tuplestores aren't shared among parallel workers, so we
2257+
* force all CTE scans to happen in the leader. Also, populating
2258+
* the CTE would require executing a subplan that's not available
2259+
* in the worker, might be parallel-restricted, and must get
2260+
* executed only once.
2261+
*/
2262+
return;
2263+
}
2264+
2265+
/*
2266+
* If there's anything in baserestrictinfo that's parallel-restricted, we
2267+
* give up on parallelizing access to this relation. We could consider
2268+
* instead postponing application of the restricted quals until we're
2269+
* above all the parallelism in the plan tree, but it's not clear that
2270+
* that would be a win in very many cases, and it might be tricky to make
2271+
* outer join clauses work correctly. It would likely break equivalence
2272+
* classes, too.
2273+
*/
2274+
if (has_parallel_hazard((Node *) rel->baserestrictinfo, false))
2275+
return;
2276+
2277+
/*
2278+
* Likewise, if the relation's outputs are not parallel-safe, give up.
2279+
* (Usually, they're just Vars, but sometimes they're not.)
2280+
*/
2281+
if (has_parallel_hazard((Node *) rel->reltarget->exprs, false))
2282+
return;
2283+
2284+
/* We have a winner. */
2285+
rel->consider_parallel = true;
2286+
}
2287+
#endif
2288+
19982289
static List *
19992290
accumulate_append_subpath(List *subpaths, Path *path)
20002291
{

0 commit comments

Comments
 (0)