Skip to content

Commit 23aef44

Browse files
Haolin Wangsoumyadeep2007
authored andcommitted
fast-analyze: implement fast ANALYZE for append-optimized tables
Prior to this patch, GPDB ANALYZE on large AO/CO tables is a time-consuming process. This is because PostgreSQL's two-stage sampling method didn't work well on AO/CO tables. GPDB had to unpack all varblocks till to the target tuples, which could easily result in almost full table scanning if sampling tuples fall around the end of the table. Denis Smirnov <sd@picodata.io> 's PR greenplum-db#11190 introduced a `logical` block concept containing fixed number of tuples to support PG's two-stage sampling mechanism, also it sped up fetching target tuples by skipping uncompression of varblock content. Thanks for Denis Smirnov's great contribution! Also, thanks for Ashwin Agrawal <aashwin@vmware.com> 's advice on leveraging AO Block Directory to locate the target sample row without scanning unnecessary varblocks, which brings another significant performance improvement with caching warmed up. In addition, - GPDB has AO/CO specific feature that storing total tuple count in an auxiliary table which could be easily obtained without too much overhead. - GPDB has `fetch` facilities support finding varblock based on AOTupleId without uncompressing unnecessary varblocks. Based on above works and properties, we re-implemented AO/CO ANALYZE sampling by combining Knuth's Algorithm S and varblock skipping in this patch, to address the time-consuming problem. We didn't impelment two-stage sampling for AO/CO as the total size of data set (total tuple count) could be known in advance hence Algorithm S is sufficient to satisfy the sampling requirement. Special thanks Zhenghua Lyu (https://kainwen.com/) for detail analysis of Algorithm S: [Analysis of Algorithm S](https://kainwen.com/2022/11/06/analysis-of-algorithm-s) and follow up [discussion](https://stackoverflow.com/questions/74345921/performance-comparsion-algorithm-s-and-algorithm-z?noredirect=1#comment131292564_74345921) Here is a simple example to show the optimization effect: [AO with compression, with Fast Analyze enabled] create table ao (a int, b inet, c inet) with (appendonly=true, orientation=row, compresstype=zlib, compresslevel=3); insert into ao select i, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet, (select ((i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text || '.' || (i%255)::text))::inet from generate_series(1,10000000)i; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; insert into ao select * from ao; select count(*) from ao; count ------------ 1280000000 (1 row) gpadmin=# analyze ao; ANALYZE Time: 2814.939 ms (00:02.815) gpadmin=# [with block directory and caching warmed] gpadmin=# analyze ao; ANALYZE Time: 1605.342 ms (00:01.605) gpadmin=# [Legacy Analyze] gpadmin=# analyze ao; ANALYZE Time: 59711.905 ms (00:59.712) gpadmin=# [Heap without compression] create table heap (a int, b inet, c inet); insert same data set gpadmin=# analyze heap; ANALYZE Time: 2087.694 ms (00:02.088) gpadmin=# Co-authored-by: Soumyadeep Chakraborty <soumyadeep2007@gmail.com> Reviewed by: Ashwin Agrawal, Soumyadeep Chakraborty, Zhenglong Li, Qing Ma
1 parent d5967fd commit 23aef44

File tree

22 files changed

+2912
-212
lines changed

22 files changed

+2912
-212
lines changed

src/backend/access/aocs/aocsam.c

Lines changed: 457 additions & 8 deletions
Large diffs are not rendered by default.

src/backend/access/aocs/aocsam_handler.c

Lines changed: 74 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "utils/lsyscache.h"
4848
#include "utils/pg_rusage.h"
4949
#include "utils/guc.h"
50+
#include "utils/sampling.h"
5051

5152
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
5253

@@ -1623,39 +1624,92 @@ static bool
16231624
aoco_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
16241625
BufferAccessStrategy bstrategy)
16251626
{
1626-
AOCSScanDesc aoscan = (AOCSScanDesc) scan;
1627-
aoscan->targetTupleId = blockno;
1628-
1629-
return true;
1627+
/*
1628+
* For append-optimized relations, we use a separate sampling
1629+
* method. See table_relation_acquire_sample_rows().
1630+
*/
1631+
ereport(ERROR,
1632+
(errcode(ERRCODE_INTERNAL_ERROR),
1633+
errmsg("API not supported for appendoptimized relations")));
16301634
}
16311635

16321636
static bool
16331637
aoco_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
16341638
double *liverows, double *deadrows,
16351639
TupleTableSlot *slot)
16361640
{
1637-
AOCSScanDesc aoscan = (AOCSScanDesc) scan;
1638-
bool ret = false;
1641+
/*
1642+
* For append-optimized relations, we use a separate sampling
1643+
* method. See table_relation_acquire_sample_rows().
1644+
*/
1645+
ereport(ERROR,
1646+
(errcode(ERRCODE_INTERNAL_ERROR),
1647+
errmsg("API not supported for appendoptimized relations")));
1648+
}
16391649

1640-
/* skip several tuples if they are not sampling target */
1641-
while (aoscan->targetTupleId > aoscan->nextTupleId)
1642-
{
1643-
aoco_getnextslot(scan, ForwardScanDirection, slot);
1644-
aoscan->nextTupleId++;
1645-
}
1650+
static int
1651+
aoco_acquire_sample_rows(Relation onerel, int elevel, HeapTuple *rows,
1652+
int targrows, double *totalrows, double *totaldeadrows)
1653+
{
1654+
int numrows = 0; /* # rows now in reservoir */
1655+
double liverows = 0; /* # live rows seen */
1656+
double deadrows = 0; /* # dead rows seen */
16461657

1647-
if (aoscan->targetTupleId == aoscan->nextTupleId)
1658+
Assert(targrows > 0);
1659+
1660+
TableScanDesc scan = table_beginscan_analyze(onerel);
1661+
TupleTableSlot *slot = table_slot_create(onerel, NULL);
1662+
AOCSScanDesc aocoscan = (AOCSScanDesc) scan;
1663+
1664+
int64 totaltupcount = AOCSScanDesc_TotalTupCount(aocoscan);
1665+
int64 totaldeadtupcount = 0;
1666+
if (aocoscan->total_seg > 0 )
1667+
totaldeadtupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&aocoscan->visibilityMap);
1668+
/*
1669+
* The conversion from int64 to double (53 significant bits) is safe as the
1670+
* AOTupleId is 48bits, the max value of totalrows is never greater than
1671+
* AOTupleId_MaxSegmentFileNum * AOTupleId_MaxRowNum (< 48 significant bits).
1672+
*/
1673+
*totalrows = (double) (totaltupcount - totaldeadtupcount);
1674+
*totaldeadrows = (double) totaldeadtupcount;
1675+
1676+
/* Prepare for sampling tuple numbers */
1677+
RowSamplerData rs;
1678+
RowSampler_Init(&rs, *totalrows, targrows, random());
1679+
1680+
while (RowSampler_HasMore(&rs))
16481681
{
1649-
ret = aoco_getnextslot(scan, ForwardScanDirection, slot);
1650-
aoscan->nextTupleId++;
1682+
aocoscan->targrow = RowSampler_Next(&rs);
1683+
1684+
vacuum_delay_point();
16511685

1652-
if (ret)
1653-
*liverows += 1;
1686+
if (aocs_get_target_tuple(aocoscan, aocoscan->targrow, slot))
1687+
{
1688+
rows[numrows++] = ExecCopySlotHeapTuple(slot);
1689+
liverows++;
1690+
}
16541691
else
1655-
*deadrows += 1; /* if return an invisible tuple */
1692+
deadrows++;
1693+
1694+
ExecClearTuple(slot);
16561695
}
16571696

1658-
return ret;
1697+
ExecDropSingleTupleTableSlot(slot);
1698+
table_endscan(scan);
1699+
1700+
/*
1701+
* Emit some interesting relation info
1702+
*/
1703+
ereport(elevel,
1704+
(errmsg("\"%s\": scanned " INT64_FORMAT " rows, "
1705+
"containing %.0f live rows and %.0f dead rows; "
1706+
"%d rows in sample, %.0f accurate total live rows, "
1707+
"%.f accurate total dead rows",
1708+
RelationGetRelationName(onerel),
1709+
rs.m, liverows, deadrows, numrows,
1710+
*totalrows, *totaldeadrows)));
1711+
1712+
return numrows;
16591713
}
16601714

16611715
static double
@@ -2588,6 +2642,7 @@ static TableAmRoutine ao_column_methods = {
25882642
.relation_vacuum = aoco_vacuum_rel,
25892643
.scan_analyze_next_block = aoco_scan_analyze_next_block,
25902644
.scan_analyze_next_tuple = aoco_scan_analyze_next_tuple,
2645+
.relation_acquire_sample_rows = aoco_acquire_sample_rows,
25912646
.index_build_range_scan = aoco_index_build_range_scan,
25922647
.index_validate_scan = aoco_index_validate_scan,
25932648

@@ -2602,7 +2657,6 @@ static TableAmRoutine ao_column_methods = {
26022657
.scan_bitmap_next_tuple = aoco_scan_bitmap_next_tuple,
26032658
.scan_sample_next_block = aoco_scan_sample_next_block,
26042659
.scan_sample_next_tuple = aoco_scan_sample_next_tuple,
2605-
.acquire_sample_rows = acquire_sample_rows,
26062660

26072661
.amoptions = ao_amoptions,
26082662
.swap_relation_files = aoco_swap_relation_files,

0 commit comments

Comments
 (0)