@@ -66,14 +66,6 @@ proc toRocksDb*(
66
66
tableOpts.dataBlockIndexType = DataBlockIndexType.binarySearchAndHash
67
67
tableOpts.dataBlockHashRatio = 0.75
68
68
69
- # A smaller block size reduces read amplification at the expense of larger
70
- # indices - there should also be some alignment with on-disk blocks so that we
71
- # typically perform reads consistent with the disk block size. It's hard
72
- # to pick a reasonable number here, but assuming compression to about 2/3,
73
- # we should use about ~6k blocks to fit in an ssd block - more benchmarks
74
- # needed!
75
- tableOpts.blockSize = 6000
76
-
77
69
let cfOpts = defaultColFamilyOptions(autoClose = true )
78
70
79
71
cfOpts.blockBasedTableFactory = tableOpts
@@ -100,24 +92,10 @@ proc toRocksDb*(
100
92
# Compared to LZ4 that was tested earlier, the default ZSTD config results
101
93
# in 10% less space and similar or slightly better performance in some
102
94
# simple tests around mainnet block 14M.
103
- #
104
- # sst_dump --file=005420.sst --command=recompress --set_block_size=6000 --compression_types=kZSTD --compression_level_from=-3 --compression_level_to=3 --compression_max_dict_bytes=16484 --compression_zstd_max_train_bytes=1638400
105
- # Compression level: -3 Size: 2497674402 Blocks: 522975 Time Taken: 29957380
106
- # Compression level: -1 Size: 2271282060 Blocks: 522975 Time Taken: 34565174
107
- # Compression level: 1 Size: 2260978713 Blocks: 522975 Time Taken: 38725150
108
- # Compression level: 3 Size: 2241970102 Blocks: 522975 Time Taken: 53415641
109
-
110
- # Based on the above, -1 and -1 would offer similarly reasonable performance
111
- # while -3 and 3 each feel like the cost/benefit is worse, in either direction
112
-
95
+ # TODO evaluate zstd dictionary compression
96
+ # https://github.com/facebook/rocksdb/wiki/Dictionary-Compression
113
97
cfOpts.bottommostCompression = Compression.zstdCompression
114
98
115
- # 16kb dictionary size per rocksdb recommendation:
116
- # https://rocksdb.org/blog/2021/05/31/dictionary-compression.html
117
- cfOpts.setBottommostCompressionOptions(level = 1 , maxDictBytes = 16384 )
118
- cfOpts.bottommostCompressionOptionsZstdMaxTrainBytes = 16384 * 100
119
- cfOpts.bottommostCompressionOptionsUseZstdDictTrainer = false
120
-
121
99
# TODO In the AriVtx table, we don't do lookups that are expected to result
122
100
# in misses thus we could avoid the filter cost - this does not apply to
123
101
# other tables since their API admit queries that might result in
0 commit comments