Skip to content

Commit 29d2a2a

Browse files
authored
feat: use ColumnEncoding_Kind_DIRECT_DELTA as default in offset stream (#1337)
* feat: use ColumnEncoding_Kind_DIRECT_DELTA as default in offset stream Optimize performance of variable-length column offsets by switching from Zstd to delta encoding. This approach better compresses incremental integer sequences, cutting disk space by more than half while maintaining performance. The following is a comparison of file sizes for different encoding methods on TPC-DS 20G: Name PAX(ZSTD) AOCS_SIZE PAX(Delta) PAX SIZE / AOCS * 100% call_center 12 kB 231 kB 10185 bytes 4.31% catalog_page 499 kB 653 kB 393 kB 60.18% catalog_returns 240 MB 171 MB 178 MB 104.09% catalog_sales 3033 MB 1837 MB 1977 MB 107.63% customer 16 MB 12 MB 12 MB 100.00% customer_address 7008 kB 3161 kB 3115 kB 98.54% customer_demographics 28 MB 8164 kB 9292 kB 113.82% date_dim 3193 kB 1406 kB 1249 kB 88.85% household_demographics 42 kB 248 kB 28 kB 11.29% income_band 1239 bytes 225 kB 1239 bytes 0.54% inventory 36 MB 71 MB 36 MB 50.70% item 3084 kB 2479 kB 2227 kB 89.84% promotion 27 kB 239 kB 18 kB 7.53% reason 2730 bytes 226 kB 2280 bytes 0.99% ship_mode 3894 bytes 227 kB 3315 bytes 1.43% store 23 kB 239 kB 18 kB 7.53% store_returns 400 MB 265 MB 277 MB 104.53% store_sales 4173 MB 2384 MB 2554 MB 107.12% time_dim 1702 kB 819 kB 627 kB 76.56% warehouse 5394 bytes 227 kB 4698 bytes 2.02% web_page 21 kB 236 kB 14 kB 5.93% web_returns 116 MB 83 MB 85 MB 102.41% web_sales 1513 MB 908 MB 982 MB 108.15%
1 parent ebc52ca commit 29d2a2a

25 files changed

+1989
-70
lines changed

contrib/pax_storage/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
Thumbs.db
1313

1414
# Temp files dir
15+
bench_data
1516
.tmp/**
1617
build*/**
1718
results/**

contrib/pax_storage/src/cpp/cmake/pax.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ set(pax_storage_src
5151
storage/columns/pax_dict_encoding.cc
5252
storage/columns/pax_decoding.cc
5353
storage/columns/pax_encoding.cc
54+
storage/columns/pax_delta_encoding.cc
5455
storage/columns/pax_rlev2_decoding.cc
5556
storage/columns/pax_rlev2_encoding.cc
5657
storage/columns/pax_vec_bitpacked_column.cc

contrib/pax_storage/src/cpp/cmake/pax_format.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ set(pax_storage_src
4141
storage/columns/pax_dict_encoding.cc
4242
storage/columns/pax_decoding.cc
4343
storage/columns/pax_encoding.cc
44+
storage/columns/pax_delta_encoding.cc
4445
storage/columns/pax_rlev2_decoding.cc
4546
storage/columns/pax_rlev2_encoding.cc
4647
storage/columns/pax_vec_column.cc

contrib/pax_storage/src/cpp/pax_gbench.cc

Lines changed: 300 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,310 @@
2525
*-------------------------------------------------------------------------
2626
*/
2727

28+
#include "pax_gbench.h"
29+
30+
#include "comm/cbdb_api.h"
31+
2832
#include <benchmark/benchmark.h>
2933

30-
static void example_benchmark(benchmark::State &state) {
34+
#include <functional>
35+
#include <memory>
36+
#include <vector>
37+
38+
#include "access/paxc_rel_options.h"
39+
#include "comm/cbdb_wrappers.h"
40+
#include "cpp-stub/src/stub.h"
41+
#include "storage/micro_partition_iterator.h"
42+
#include "storage/pax.h"
43+
#include "storage/strategy.h"
44+
45+
namespace pax::bench {
46+
47+
// Create memory context for benchmark
48+
void CreateMemoryContext() {
49+
MemoryContext test_memory_context = AllocSetContextCreate(
50+
(MemoryContext)NULL, "TestMemoryContext", 80 * 1024 * 1024,
51+
80 * 1024 * 1024, 80 * 1024 * 1024);
52+
MemoryContextSwitchTo(test_memory_context);
53+
}
54+
55+
// Global registry
56+
class BenchmarkRegistry {
57+
private:
58+
std::vector<InitFunction> init_functions_;
59+
std::vector<CleanupFunction> cleanup_functions_;
60+
bool initialized_ = false;
61+
62+
public:
63+
void RegisterInitFunction(InitFunction func) {
64+
init_functions_.push_back(func);
65+
}
66+
67+
void RegisterCleanupFunction(CleanupFunction func) {
68+
cleanup_functions_.push_back(func);
69+
}
70+
71+
void RunAllInitFunctions() {
72+
if (initialized_) return;
73+
74+
printf("Running PAX Benchmark Suite...\n");
75+
printf("Initializing all benchmark modules...\n\n");
76+
77+
for (const auto &func : init_functions_) {
78+
func();
79+
}
80+
initialized_ = true;
81+
}
82+
83+
void RunAllCleanupFunctions() {
84+
if (!initialized_) return;
85+
86+
printf("\nCleaning up all benchmark modules...\n");
87+
88+
// Cleanup functions executed in reverse order
89+
for (auto it = cleanup_functions_.rbegin(); it != cleanup_functions_.rend();
90+
++it) {
91+
(*it)();
92+
}
93+
initialized_ = false;
94+
}
95+
};
96+
97+
// Global registry access function
98+
BenchmarkRegistry &GetBenchmarkRegistry() {
99+
static BenchmarkRegistry instance;
100+
return instance;
101+
}
102+
103+
// Registration functions
104+
void RegisterBenchmarkInit(InitFunction func) {
105+
GetBenchmarkRegistry().RegisterInitFunction(func);
106+
}
107+
108+
void RegisterBenchmarkCleanup(CleanupFunction func) {
109+
GetBenchmarkRegistry().RegisterCleanupFunction(func);
110+
}
111+
112+
// Global Mock functions for benchmark framework
113+
bool MockMinMaxGetStrategyProcinfo(Oid, Oid, Oid *, FmgrInfo *,
114+
StrategyNumber) {
115+
return false;
116+
}
117+
118+
int32 MockGetFastSequences(Oid) {
119+
static int32 mock_id = 0;
120+
return mock_id++;
121+
}
122+
123+
void MockInsertMicroPartitionPlaceHolder(Oid, int) {}
124+
void MockDeleteMicroPartitionEntry(Oid, Snapshot, int) {}
125+
void MockExecStoreVirtualTuple(TupleTableSlot *) {}
126+
127+
std::string MockBuildPaxDirectoryPath(RelFileNode rnode, BackendId backend_id) {
128+
// Create a simple file path for benchmarks
129+
return std::string("./bench_data");
130+
}
131+
132+
std::vector<int> MockGetMinMaxColumnIndexes(Relation) {
133+
return std::vector<int>();
134+
}
135+
136+
std::vector<int> MockBloomFilterColumnIndexes(Relation) {
137+
return std::vector<int>();
138+
}
139+
140+
std::vector<std::tuple<ColumnEncoding_Kind, int>> MockGetRelEncodingOptions(
141+
Relation relation) {
142+
std::vector<std::tuple<ColumnEncoding_Kind, int>> encoding_opts;
143+
144+
// Get number of columns from relation
145+
int num_columns = 10; // default for benchmark
146+
if (relation && relation->rd_att) {
147+
num_columns = relation->rd_att->natts;
148+
}
149+
150+
// Create encoding options for each column (NO_ENCODED, 0)
151+
for (int i = 0; i < num_columns; i++) {
152+
encoding_opts.emplace_back(
153+
std::make_tuple(ColumnEncoding_Kind_NO_ENCODED, 0));
154+
}
155+
156+
return encoding_opts;
157+
}
158+
159+
// Mock TupleDescInitEntry that doesn't rely on SYSCACHE
160+
void MockTupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber,
161+
const char *attributeName, Oid oidtypeid,
162+
int32 typmod, int attdim) {
163+
// Basic validation
164+
if (attributeNumber < 1 || attributeNumber > desc->natts) {
165+
return;
166+
}
167+
168+
Form_pg_attribute att = TupleDescAttr(desc, attributeNumber - 1);
169+
170+
// Set basic attribute properties
171+
namestrcpy(&(att->attname), attributeName);
172+
att->atttypid = oidtypeid;
173+
att->atttypmod = typmod;
174+
att->attndims = attdim;
175+
att->attnum = attributeNumber;
176+
att->attnotnull = false;
177+
att->atthasdef = false;
178+
att->attidentity = '\0';
179+
att->attgenerated = '\0';
180+
att->attisdropped = false;
181+
att->attislocal = true;
182+
att->attinhcount = 0;
183+
att->attcollation = InvalidOid;
184+
185+
// Set type-specific properties based on OID (hardcoded for common types)
186+
switch (oidtypeid) {
187+
case INT2OID: // smallint
188+
att->attlen = 2;
189+
att->attalign = 's';
190+
att->attstorage = 'p';
191+
att->attbyval = true;
192+
break;
193+
case INT4OID: // integer
194+
att->attlen = 4;
195+
att->attalign = 'i';
196+
att->attstorage = TYPSTORAGE_PLAIN;
197+
att->attbyval = true;
198+
break;
199+
case INT8OID: // bigint
200+
att->attlen = 8;
201+
att->attalign = 'd';
202+
att->attstorage = TYPSTORAGE_PLAIN;
203+
att->attbyval = FLOAT8PASSBYVAL;
204+
break;
205+
case FLOAT8OID: // double precision
206+
att->attlen = 8;
207+
att->attalign = 'd';
208+
att->attstorage = 'p';
209+
att->attbyval = FLOAT8PASSBYVAL;
210+
break;
211+
case BOOLOID: // boolean
212+
att->attlen = 1;
213+
att->attalign = 'c';
214+
att->attstorage = 'p';
215+
att->attbyval = true;
216+
break;
217+
case TEXTOID: // text
218+
att->attlen = -1;
219+
att->attalign = 'i';
220+
att->attstorage = TYPSTORAGE_PLAIN;
221+
att->attbyval = false;
222+
att->attcollation = DEFAULT_COLLATION_OID;
223+
break;
224+
case NUMERICOID: // numeric
225+
att->attlen = -1;
226+
att->attalign = TYPALIGN_INT;
227+
att->attstorage = TYPSTORAGE_PLAIN;
228+
att->attbyval = false;
229+
break;
230+
case TIMESTAMPOID: // timestamp
231+
att->attlen = 8;
232+
att->attalign = 'd';
233+
att->attstorage = TYPSTORAGE_PLAIN;
234+
att->attbyval = FLOAT8PASSBYVAL;
235+
break;
236+
default:
237+
// Default values for unknown types
238+
att->attlen = -1;
239+
att->attalign = 'i';
240+
att->attstorage = 'p';
241+
att->attbyval = false;
242+
break;
243+
}
244+
}
245+
246+
// Global initialization function for general benchmark framework
247+
void GlobalBenchmarkInit() {
248+
static bool global_initialized = false;
249+
if (global_initialized) return;
250+
251+
printf("Initializing PAX benchmark framework...\n");
252+
253+
// Initialize memory context
254+
MemoryContextInit();
255+
256+
// Setup global Mock functions
257+
static std::unique_ptr<Stub> stub_global = std::make_unique<Stub>();
258+
259+
stub_global->set(MinMaxGetPgStrategyProcinfo, MockMinMaxGetStrategyProcinfo);
260+
stub_global->set(CPaxGetFastSequences, MockGetFastSequences);
261+
stub_global->set(cbdb::BuildPaxDirectoryPath, MockBuildPaxDirectoryPath);
262+
stub_global->set(cbdb::InsertMicroPartitionPlaceHolder,
263+
MockInsertMicroPartitionPlaceHolder);
264+
stub_global->set(cbdb::DeleteMicroPartitionEntry,
265+
MockDeleteMicroPartitionEntry);
266+
stub_global->set(cbdb::GetMinMaxColumnIndexes, MockGetMinMaxColumnIndexes);
267+
stub_global->set(cbdb::GetBloomFilterColumnIndexes,
268+
MockBloomFilterColumnIndexes);
269+
stub_global->set(cbdb::GetRelEncodingOptions, MockGetRelEncodingOptions);
270+
stub_global->set(ExecStoreVirtualTuple, MockExecStoreVirtualTuple);
271+
stub_global->set(TupleDescInitEntry, MockTupleDescInitEntry);
272+
273+
// Create basic test directory
274+
system("mkdir -p ./bench_data");
275+
276+
global_initialized = true;
277+
printf("PAX benchmark framework initialized.\n");
278+
}
279+
280+
// Global cleanup function for general benchmark framework
281+
void GlobalBenchmarkCleanup() {
282+
printf("Cleaning up PAX benchmark framework...\n");
283+
284+
// Clean up test directory
285+
// system("rm -rf ./bench_data");
286+
287+
// Reset memory context
288+
if (TopMemoryContext) {
289+
MemoryContextReset(TopMemoryContext);
290+
}
291+
292+
printf("PAX benchmark framework cleaned up.\n");
293+
}
294+
295+
// Example benchmark test
296+
static void example_benchmark(::benchmark::State &state) {
31297
for (auto _ : state) {
298+
// Empty example test
32299
}
33300
}
34301
BENCHMARK(example_benchmark);
35302

36-
BENCHMARK_MAIN();
303+
} // namespace pax::benchmark
304+
305+
// Global cleanup function (C-style for atexit)
306+
static void cleanup_all() {
307+
pax::bench::GetBenchmarkRegistry().RunAllCleanupFunctions();
308+
pax::bench::GlobalBenchmarkCleanup();
309+
}
310+
311+
// Main entry function
312+
int main(int argc, char **argv) {
313+
// Register global cleanup function
314+
std::atexit(cleanup_all);
315+
316+
// Global initialization
317+
pax::bench::GlobalBenchmarkInit();
318+
319+
// Run all registered initialization functions
320+
pax::bench::GetBenchmarkRegistry().RunAllInitFunctions();
321+
322+
// Initialize benchmark framework
323+
::benchmark::Initialize(&argc, argv);
324+
if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1;
325+
326+
printf("\n=== Starting PAX Benchmark Suite ===\n");
327+
printf("Use --benchmark_filter=<pattern> to run specific tests\n");
328+
printf("Use --benchmark_list_tests to see all available tests\n\n");
329+
330+
// Run benchmark
331+
::benchmark::RunSpecifiedBenchmarks();
332+
333+
return 0;
334+
}

0 commit comments

Comments
 (0)