Skip to content

Commit 9d3014e

Browse files
authored
refactor: avoid alloc extra pages for small token in sealed segment (#95)
resolve #94 This PR refactors the posting list serialization to avoid allocating skip_info and block_data pages for small tokens in sealed segments. For 1m ms marco dataset, it reduces the index size from 16G to 4.7G --------- Signed-off-by: Mingzhuo Yin <[email protected]>
1 parent da0908a commit 9d3014e

File tree

16 files changed

+575
-408
lines changed

16 files changed

+575
-408
lines changed

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ pg_test = ["dep:rand"]
2121

2222
[dependencies]
2323
arrayvec = "0.7.6"
24-
bitflags = "2.9.0"
24+
bitflags = "2.10.0"
2525
bitpacking = { version = "0.9", default-features = false, features = [
2626
"bitpacker4x",
2727
] }
28-
bytemuck = "1.18"
29-
generator = "0.8.4"
28+
bytemuck = "1.24"
29+
generator = "0.8.7"
3030
lending-iterator = "0.1.7"
3131
pgrx = "=0.16.1"
3232
serde = { version = "1.0.217", features = ["derive"] }

src/algorithm/block_wand.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ impl SealedScorer {
2323
delete_bitmap_reader: &'a DeleteBitmapReader,
2424
) -> impl Iterator<Item = (f32, u32)> + 'a {
2525
let mut scorer = self;
26-
let g = generator::Gn::new_scoped_local(move |mut s| {
26+
generator::Gn::new_scoped_local(move |mut s| {
2727
loop {
2828
scorer.posting.decode_block();
2929
loop {
@@ -44,8 +44,7 @@ impl SealedScorer {
4444
}
4545
}
4646
done!()
47-
});
48-
g
47+
})
4948
}
5049
}
5150

src/index/build.rs

Lines changed: 13 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,19 @@ use crate::{
66
page_alloc, page_alloc_init_forknum, page_write, PageFlags, VirtualPageWriter,
77
METAPAGE_BLKNO,
88
},
9-
segment::{
10-
builder::IndexBuilder,
11-
meta::{MetaPageData, META_VERSION},
12-
sealed::SealedSegmentData,
13-
},
9+
segment::{builder::IndexBuilder, meta::MetaPageData},
1410
};
1511

1612
#[pgrx::pg_guard]
1713
pub extern "C-unwind" fn ambuildempty(index: pgrx::pg_sys::Relation) {
1814
let mut meta_page = page_alloc_init_forknum(index, PageFlags::META);
1915
assert_eq!(meta_page.blkno(), METAPAGE_BLKNO);
20-
let field_norm_blkno = VirtualPageWriter::init_fork(index, PageFlags::FIELD_NORM);
21-
let term_stat_blkno = VirtualPageWriter::init_fork(index, PageFlags::TERM_STATISTIC);
22-
let payload_blkno = VirtualPageWriter::init_fork(index, PageFlags::PAYLOAD);
23-
let delete_bitmap_blkno = VirtualPageWriter::init_fork(index, PageFlags::DELETE);
2416

25-
let ptr = meta_page.content.as_mut_ptr() as *mut MetaPageData;
26-
unsafe {
27-
ptr.write(MetaPageData {
28-
version: META_VERSION,
29-
doc_cnt: 0,
30-
doc_term_cnt: 0,
31-
term_id_cnt: 0,
32-
field_norm_blkno,
33-
payload_blkno,
34-
term_stat_blkno,
35-
delete_bitmap_blkno,
36-
current_doc_id: 0,
37-
sealed_doc_id: 0,
38-
growing_segment: None,
39-
sealed_segment: SealedSegmentData {
40-
term_info_blkno: pgrx::pg_sys::InvalidBlockNumber,
41-
term_id_cnt: 0,
42-
},
43-
});
44-
meta_page.header.pd_lower += std::mem::size_of::<MetaPageData>() as u16;
45-
}
17+
let meta: &mut MetaPageData = meta_page.init_mut();
18+
meta.field_norm_blkno = VirtualPageWriter::init_fork(index, PageFlags::FIELD_NORM);
19+
meta.term_stat_blkno = VirtualPageWriter::init_fork(index, PageFlags::TERM_STATISTIC);
20+
meta.payload_blkno = VirtualPageWriter::init_fork(index, PageFlags::PAYLOAD);
21+
meta.delete_bitmap_blkno = VirtualPageWriter::init_fork(index, PageFlags::DELETE);
4622
}
4723

4824
struct BuildState {
@@ -114,29 +90,14 @@ fn write_down(state: &BuildState) {
11490
let doc_cnt = state.builder.doc_cnt();
11591
let doc_term_cnt = state.builder.doc_term_cnt();
11692
let term_id_cnt = state.builder.term_id_cnt();
93+
11794
let mut meta_page = page_write(state.index, METAPAGE_BLKNO);
118-
let ptr = meta_page.content.as_mut_ptr() as *mut MetaPageData;
119-
unsafe {
120-
ptr.write(MetaPageData {
121-
version: META_VERSION,
122-
doc_cnt,
123-
doc_term_cnt,
124-
term_id_cnt,
125-
field_norm_blkno: pgrx::pg_sys::InvalidBlockNumber,
126-
payload_blkno: pgrx::pg_sys::InvalidBlockNumber,
127-
term_stat_blkno: pgrx::pg_sys::InvalidBlockNumber,
128-
delete_bitmap_blkno: pgrx::pg_sys::InvalidBlockNumber,
129-
current_doc_id: doc_cnt,
130-
sealed_doc_id: doc_cnt,
131-
growing_segment: None,
132-
sealed_segment: SealedSegmentData {
133-
term_info_blkno: pgrx::pg_sys::InvalidBlockNumber,
134-
term_id_cnt,
135-
},
136-
});
137-
meta_page.header.pd_lower += std::mem::size_of::<MetaPageData>() as u16;
138-
}
139-
let meta: &mut MetaPageData = meta_page.as_mut();
95+
let meta: &mut MetaPageData = meta_page.init_mut();
96+
meta.doc_cnt = doc_cnt;
97+
meta.doc_term_cnt = doc_term_cnt;
98+
meta.term_id_cnt = term_id_cnt;
99+
meta.current_doc_id = doc_cnt;
100+
meta.sealed_doc_id = doc_cnt;
140101

141102
// delete bitmap
142103
let mut delete_bitmap_writer = VirtualPageWriter::new(state.index, PageFlags::DELETE, true);

src/index/vacuum.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use lending_iterator::LendingIterator;
22

33
use crate::{
4-
page::{bm25_page_size, page_read, page_write, METAPAGE_BLKNO},
4+
page::{page_read, page_write, BM25_PAGE_SIZE, METAPAGE_BLKNO},
55
segment::{
66
delete::DeleteBitmapReader,
77
field_norm::{FieldNormRead, FieldNormReader},
@@ -45,7 +45,7 @@ pub unsafe extern "C-unwind" fn ambulkdelete(
4545
let mut delete_bitmap_reader = DeleteBitmapReader::new(index, meta.delete_bitmap_blkno);
4646

4747
for i in 0..meta.current_doc_id {
48-
if i % bm25_page_size() as u32 == 0 {
48+
if i % BM25_PAGE_SIZE as u32 == 0 {
4949
#[cfg(not(feature = "pg18"))]
5050
pgrx::pg_sys::vacuum_delay_point();
5151
#[cfg(feature = "pg18")]

src/page/inspector.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
use crate::segment::{
2+
meta::MetaPageData,
3+
posting::{PostingTermMetaData, SkipBlock},
4+
};
5+
6+
use super::{page_read, PageFlags};
7+
8+
#[pgrx::pg_extern]
9+
fn bm25_page_inspect(index: pgrx::PgRelation, blkno: i32) -> String {
10+
let page = page_read(index.as_ptr(), blkno.try_into().unwrap());
11+
match page.opaque.page_flag {
12+
PageFlags::META => {
13+
let meta_page: &MetaPageData = page.as_ref();
14+
format!("Meta Page:\n{:#?}", meta_page)
15+
}
16+
PageFlags::PAYLOAD => {
17+
let data: &[u64] = bytemuck::cast_slice(page.data());
18+
format!("Payload Page ({} entries):\n{:?}", data.len(), data)
19+
}
20+
PageFlags::FIELD_NORM => {
21+
let data: &[u8] = page.data();
22+
format!("Field Norm Page ({} entries):\n{:?}", data.len(), data)
23+
}
24+
PageFlags::TERM_STATISTIC => {
25+
let data: &[u32] = bytemuck::cast_slice(page.data());
26+
format!("Term Statistic Page ({} entries):\n{:?}", data.len(), data)
27+
}
28+
PageFlags::TERM_INFO => {
29+
let data: &[u32] = bytemuck::cast_slice(page.data());
30+
format!("Term Info Page ({} entries):\n{:?}", data.len(), data)
31+
}
32+
PageFlags::TERM_META => {
33+
let term_meta: &PostingTermMetaData = page.as_ref();
34+
format!("Term Meta Page:\n{:#?}", term_meta)
35+
}
36+
PageFlags::SKIP_INFO => {
37+
let data: &[SkipBlock] = bytemuck::cast_slice(page.data());
38+
format!("Skip Info Page ({} entries):\n{:?}", data.len(), data)
39+
}
40+
PageFlags::BLOCK_DATA => {
41+
let data: &[u8] = page.data();
42+
format!("Block Data Page ({} bytes):\n{:02X?}", data.len(), data)
43+
}
44+
PageFlags::GROWING => {
45+
let data: &[u8] = page.data();
46+
format!(
47+
"Growing Segment Page ({} bytes):\n{:02X?}",
48+
data.len(),
49+
data
50+
)
51+
}
52+
PageFlags::GROWING_REDIRECT => {
53+
let data: &[u8] = page.data();
54+
format!(
55+
"Growing Segment Redirect Page ({} bytes):\n{:02X?}",
56+
data.len(),
57+
data
58+
)
59+
}
60+
PageFlags::DELETE => {
61+
let data: &[u8] = page.data();
62+
format!("Delete Bitmap Page ({} bytes):\n{:02X?}", data.len(), data)
63+
}
64+
PageFlags::VIRTUAL_INODE => {
65+
let data: &[u32] = bytemuck::cast_slice(page.data());
66+
format!("Virtual Inode Page ({} entries):\n{:?}", data.len(), data)
67+
}
68+
PageFlags::FREE => "Free Page".to_string(),
69+
_ => {
70+
let data: &[u8] = page.data();
71+
format!(
72+
"Unknown Page Flag {:?} ({} bytes):\n{:02X?}",
73+
page.opaque.page_flag,
74+
data.len(),
75+
data
76+
)
77+
}
78+
}
79+
}

src/page/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod inspector;
12
mod postgres;
23
mod reader;
34
mod r#virtual;

src/page/postgres.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use std::{
44
};
55

66
const _: () = {
7-
assert!(std::mem::size_of::<pgrx::pg_sys::PageHeaderData>() % 8 == 0);
8-
assert!(std::mem::size_of::<Bm25PageOpaqueData>() % 8 == 0);
7+
assert!(std::mem::size_of::<pgrx::pg_sys::PageHeaderData>().is_multiple_of(8));
8+
assert!(std::mem::size_of::<Bm25PageOpaqueData>().is_multiple_of(8));
99
assert!(std::mem::size_of::<PageData>() == pgrx::pg_sys::BLCKSZ as usize);
1010
};
1111

@@ -14,7 +14,7 @@ pub const METAPAGE_BLKNO: pgrx::pg_sys::BlockNumber = 0;
1414
pub const BM25_PAGE_ID: u16 = 0xFF88;
1515

1616
bitflags::bitflags! {
17-
#[derive(Debug, Clone, Copy)]
17+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1818
pub struct PageFlags: u16 {
1919
const META = 1 << 0;
2020
const PAYLOAD = 1 << 1;
@@ -27,15 +27,16 @@ bitflags::bitflags! {
2727
const GROWING = 1 << 8;
2828
const DELETE = 1 << 9;
2929
const GROWING_REDIRECT = 1 << 10;
30+
const VIRTUAL_INODE = 1 << 11;
3031
const FREE = 1 << 15;
3132
}
3233
}
3334

34-
pub const fn bm25_page_size() -> usize {
35+
pub const BM25_PAGE_SIZE: usize = {
3536
pgrx::pg_sys::BLCKSZ as usize
3637
- std::mem::size_of::<pgrx::pg_sys::PageHeaderData>()
3738
- std::mem::size_of::<Bm25PageOpaqueData>()
38-
}
39+
};
3940

4041
#[repr(C, align(8))]
4142
pub struct Bm25PageOpaqueData {
@@ -47,7 +48,7 @@ pub struct Bm25PageOpaqueData {
4748
#[repr(C, align(8))]
4849
pub struct PageData {
4950
pub header: pgrx::pg_sys::PageHeaderData,
50-
pub content: [u8; bm25_page_size()],
51+
pub content: [u8; BM25_PAGE_SIZE],
5152
pub opaque: Bm25PageOpaqueData,
5253
}
5354

@@ -93,7 +94,7 @@ impl PageData {
9394
impl<T> AsRef<T> for PageData {
9495
fn as_ref(&self) -> &T {
9596
const {
96-
assert!(std::mem::size_of::<T>() <= bm25_page_size());
97+
assert!(std::mem::size_of::<T>() <= BM25_PAGE_SIZE);
9798
}
9899
unsafe { &*(self.content.as_ptr() as *const T) }
99100
}
@@ -102,7 +103,7 @@ impl<T> AsRef<T> for PageData {
102103
impl<T> AsMut<T> for PageData {
103104
fn as_mut(&mut self) -> &mut T {
104105
const {
105-
assert!(std::mem::size_of::<T>() <= bm25_page_size());
106+
assert!(std::mem::size_of::<T>() <= BM25_PAGE_SIZE);
106107
}
107108
unsafe { &mut *(self.content.as_mut_ptr() as *mut T) }
108109
}
@@ -201,6 +202,16 @@ impl PageWriteGuard {
201202
PageReadGuard { buf, page }
202203
}
203204
}
205+
206+
pub fn init_mut<T: Default>(&mut self) -> &mut T {
207+
assert!(std::mem::size_of::<T>() <= BM25_PAGE_SIZE);
208+
let ptr = self.content.as_mut_ptr() as *mut T;
209+
unsafe {
210+
ptr.write(T::default());
211+
}
212+
self.header.pd_lower += std::mem::size_of::<T>() as u16;
213+
unsafe { &mut *ptr }
214+
}
204215
}
205216

206217
impl Deref for PageWriteGuard {
@@ -472,7 +483,7 @@ bitflags::bitflags! {
472483
}
473484

474485
pub fn page_append_item(page: &mut PageData, item: &[u8], redirect: bool) -> bool {
475-
if item.len() > bm25_page_size() {
486+
if item.len() > BM25_PAGE_SIZE {
476487
return false;
477488
}
478489

src/page/reader.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::page::bm25_page_size;
1+
use crate::page::BM25_PAGE_SIZE;
22

33
use super::{page_read, page_write, PageReadGuard};
44

@@ -9,6 +9,11 @@ pub struct ContinuousPageReader<T> {
99
}
1010

1111
impl<T: Copy> ContinuousPageReader<T> {
12+
const PAGE_COUNT: u32 = {
13+
assert!(std::mem::align_of::<T>() <= 8);
14+
(BM25_PAGE_SIZE / std::mem::size_of::<T>()) as u32
15+
};
16+
1217
pub fn new(index: pgrx::pg_sys::Relation, start_blkno: pgrx::pg_sys::BlockNumber) -> Self {
1318
Self {
1419
index,
@@ -18,27 +23,22 @@ impl<T: Copy> ContinuousPageReader<T> {
1823
}
1924

2025
pub fn read(&self, idx: u32) -> T {
21-
let blkno_offset = idx / Self::page_count() as u32;
26+
let blkno_offset = idx / Self::PAGE_COUNT;
2227
let blkno = self.start_blkno + blkno_offset as pgrx::pg_sys::BlockNumber;
23-
let offset = (idx % Self::page_count() as u32) as usize;
28+
let offset = (idx % Self::PAGE_COUNT) as usize;
2429
let page = page_read(self.index, blkno);
2530
unsafe { page.data().as_ptr().cast::<T>().add(offset).read() }
2631
}
2732

2833
pub fn update(&self, idx: u32, f: impl FnOnce(&mut T)) {
29-
let blkno_offset = idx / Self::page_count() as u32;
34+
let blkno_offset = idx / Self::PAGE_COUNT;
3035
let blkno = self.start_blkno + blkno_offset as pgrx::pg_sys::BlockNumber;
31-
let offset = (idx % Self::page_count() as u32) as usize;
36+
let offset = (idx % Self::PAGE_COUNT) as usize;
3237
let mut page = page_write(self.index, blkno);
3338
let data = page.data_mut();
3439
let ptr = unsafe { data.as_mut_ptr().cast::<T>().add(offset) };
3540
f(unsafe { &mut *ptr });
3641
}
37-
38-
const fn page_count() -> usize {
39-
assert!(std::mem::align_of::<T>() <= 8);
40-
bm25_page_size() / std::mem::size_of::<T>()
41-
}
4242
}
4343

4444
pub struct PageReader {

0 commit comments

Comments
 (0)