Skip to content

Commit 68c758b

Browse files
SmartKeyerrormy-ship-it
authored andcommitted
fix tids are not in order when building bitmap index (#15271)
When we build a bitmap index on a heap table, we need to scan all the tuples. Usually, it'll start at block 0 to the end. But if we allow sync scan, it'll scan the table circularly, from block X up to the end and then from block 0 to X-1, to ensure we visit all rows while still participating in the common scan. This could lead tids are not in order when building bitmap index, which will throw an error in the current implementation. So this PR forbids sync scan when building bitmap index. Besides the above fact, if we build bitmap index on a heap table, there could have HOT-chain, and it'll return the root tuple's tid, which could lead to the TIDs we scanned are not in order.
1 parent 8ef6dec commit 68c758b

File tree

5 files changed

+98
-18
lines changed

5 files changed

+98
-18
lines changed

src/backend/access/bitmap/bitmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ bmbuild(Relation heap, Relation index, IndexInfo *indexInfo)
145145
_bitmap_init_buildstate(index, &bmstate);
146146

147147
/* do the heap scan */
148-
reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
148+
reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
149149
bmbuildCallback, (void *) &bmstate,
150150
NULL);
151151
/* clean up the build state */

src/backend/access/bitmap/bitmapinsert.c

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1790,8 +1790,24 @@ buf_add_tid(Relation rel, BMTidBuildBuf *tids, uint64 tidnum,
17901790
buf = lov_buf->bufs[off - 1];
17911791

17921792
Buffer lovbuf = _bitmap_getbuf(rel, lov_block, BM_WRITE);
1793-
buf_add_tid_with_fill(rel, buf, lovbuf, off,
1794-
tidnum, state->use_wal);
1793+
1794+
if (tidnum < buf->last_tid)
1795+
{
1796+
/*
1797+
* Usually, tidnum is greater than lovItem->bm_last_setbit.
1798+
* However, if we build bitmap index on a heap table, there could
1799+
* have HOT-chain, and it'll return the root tuple's tid, which could
1800+
* lead to the TIDs we scanned are not in order, so we need to scan
1801+
* through the bitmap vector, and update the bit in tidnum directly.
1802+
*/
1803+
_bitmap_write_new_bitmapwords(rel, lovbuf, off, buf, state->use_wal);
1804+
_bitmap_free_tidbuf(buf);
1805+
1806+
updatesetbit(rel, lovbuf, off, tidnum, state->use_wal);
1807+
}
1808+
else
1809+
buf_add_tid_with_fill(rel, buf, lovbuf, off, tidnum, state->use_wal);
1810+
17951811
_bitmap_relbuf(lovbuf);
17961812
}
17971813
else
@@ -1819,8 +1835,7 @@ buf_add_tid(Relation rel, BMTidBuildBuf *tids, uint64 tidnum,
18191835

18201836
buf->curword = 0;
18211837

1822-
buf_add_tid_with_fill(rel, buf, lovbuf, off, tidnum,
1823-
state->use_wal);
1838+
buf_add_tid_with_fill(rel, buf, lovbuf, off, tidnum, state->use_wal);
18241839

18251840
_bitmap_relbuf(lovbuf);
18261841

@@ -1851,19 +1866,6 @@ buf_add_tid_with_fill(Relation rel, BMTIDBuffer *buf,
18511866
*/
18521867
zeros = tidnum - buf->last_tid - 1;
18531868

1854-
/*
1855-
* If zeros is less than 0, the incoming tids are not
1856-
* sorted. Currently, this is not allowed.
1857-
*/
1858-
if (zeros < 0)
1859-
ereport(ERROR,
1860-
(errcode(ERRCODE_INTERNAL_ERROR),
1861-
errmsg("tids are not in order when building bitmap index %u (relfilenode %u/%u/%lu):"
1862-
" new tidnum " INT64_FORMAT ", last tidnum " INT64_FORMAT,
1863-
RelationGetRelid(rel),
1864-
rel->rd_node.spcNode, rel->rd_node.dbNode, rel->rd_node.relNode,
1865-
tidnum, buf->last_tid)));
1866-
18671869
if (zeros > 0)
18681870
{
18691871
uint64 zerosNeeded;

src/test/regress/expected/bitmap_index.out

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,3 +1061,30 @@ select gp_inject_fault('simulate_bitmap_and', 'reset', dbid) from gp_segment_con
10611061
reset optimizer_enable_tablescan;
10621062
reset optimizer_enable_dynamictablescan;
10631063
drop table bmunion;
1064+
-- test create bitmap index and there have HOT chains.
1065+
drop table if exists bm_test;
1066+
NOTICE: table "bm_test" does not exist, skipping
1067+
create table bm_test(a int, b int);
1068+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
1069+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
1070+
-- insert some data into a one segment
1071+
insert into bm_test values (1, 1);
1072+
insert into bm_test values (1, 2);
1073+
insert into bm_test values (1, 3);
1074+
insert into bm_test values (12, 1);
1075+
-- update the first tuple using HOT, since this page
1076+
-- just have 4 tuples, there have full free space to
1077+
-- use HOT update.
1078+
update bm_test set b = 1 where a = 1 and b = 1;
1079+
-- After the update, the tids that the value of b is equal to 1
1080+
-- we scanned will not be in order, due to HOT.
1081+
create index idx_bm_test on bm_test using bitmap(b);
1082+
select * from bm_test where b = 1;
1083+
a | b
1084+
----+---
1085+
1 | 1
1086+
12 | 1
1087+
(2 rows)
1088+
1089+
-- clean up
1090+
drop table bm_test;

src/test/regress/expected/bitmap_index_optimizer.out

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,3 +1072,30 @@ select gp_inject_fault('simulate_bitmap_and', 'reset', dbid) from gp_segment_con
10721072
reset optimizer_enable_tablescan;
10731073
reset optimizer_enable_dynamictablescan;
10741074
drop table bmunion;
1075+
-- test create bitmap index and there have HOT chains.
1076+
drop table if exists bm_test;
1077+
NOTICE: table "bm_test" does not exist, skipping
1078+
create table bm_test(a int, b int);
1079+
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
1080+
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
1081+
-- insert some data into a one segment
1082+
insert into bm_test values (1, 1);
1083+
insert into bm_test values (1, 2);
1084+
insert into bm_test values (1, 3);
1085+
insert into bm_test values (12, 1);
1086+
-- update the first tuple using HOT, since this page
1087+
-- just have 4 tuples, there have full free space to
1088+
-- use HOT update.
1089+
update bm_test set b = 1 where a = 1 and b = 1;
1090+
-- After the update, the tids that the value of b is equal to 1
1091+
-- we scanned will not be in order, due to HOT.
1092+
create index idx_bm_test on bm_test using bitmap(b);
1093+
select * from bm_test where b = 1;
1094+
a | b
1095+
----+---
1096+
1 | 1
1097+
12 | 1
1098+
(2 rows)
1099+
1100+
-- clean up
1101+
drop table bm_test;

src/test/regress/sql/bitmap_index.sql

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,3 +449,27 @@ reset optimizer_enable_tablescan;
449449
reset optimizer_enable_dynamictablescan;
450450

451451
drop table bmunion;
452+
453+
454+
-- test create bitmap index and there have HOT chains.
455+
drop table if exists bm_test;
456+
create table bm_test(a int, b int);
457+
458+
-- insert some data into a one segment
459+
insert into bm_test values (1, 1);
460+
insert into bm_test values (1, 2);
461+
insert into bm_test values (1, 3);
462+
insert into bm_test values (12, 1);
463+
464+
-- update the first tuple using HOT, since this page
465+
-- just have 4 tuples, there have full free space to
466+
-- use HOT update.
467+
update bm_test set b = 1 where a = 1 and b = 1;
468+
469+
-- After the update, the tids that the value of b is equal to 1
470+
-- we scanned will not be in order, due to HOT.
471+
create index idx_bm_test on bm_test using bitmap(b);
472+
select * from bm_test where b = 1;
473+
474+
-- clean up
475+
drop table bm_test;

0 commit comments

Comments
 (0)