@@ -14,7 +14,7 @@ use std::{
1414} ;
1515
1616use linera_base:: ensure;
17- use rocksdb:: { BlockBasedOptions , Cache , DBCompactionStyle } ;
17+ use rocksdb:: { BlockBasedOptions , Cache , DBCompactionStyle , SliceTransform } ;
1818use serde:: { Deserialize , Serialize } ;
1919use sysinfo:: { CpuRefreshKind , MemoryRefreshKind , RefreshKind , System } ;
2020use tempfile:: TempDir ;
@@ -171,20 +171,39 @@ impl RocksDbStoreExecutor {
171171 key_prefix : Vec < u8 > ,
172172 ) -> Result < Vec < Vec < u8 > > , RocksDbStoreInternalError > {
173173 check_key_size ( & key_prefix) ?;
174+
174175 let mut prefix = self . start_key . clone ( ) ;
175176 prefix. extend ( key_prefix) ;
176177 let len = prefix. len ( ) ;
177- let mut iter = self . db . raw_iterator ( ) ;
178+
179+ // Configure ReadOptions optimized for SSDs and iterator performance
180+ let mut read_opts = rocksdb:: ReadOptions :: default ( ) ;
181+ // Enable async I/O for better concurrency
182+ read_opts. set_async_io ( true ) ;
183+
184+ // Set precise upper bound to minimize key traversal
185+ let mut upper_bound = prefix. clone ( ) ;
186+ if let Some ( last_byte) = upper_bound. last_mut ( ) {
187+ if * last_byte < 255 {
188+ * last_byte += 1 ;
189+ read_opts. set_iterate_upper_bound ( upper_bound) ;
190+ }
191+ }
192+
193+ let mut iter = self . db . raw_iterator_opt ( read_opts) ;
178194 let mut keys = Vec :: new ( ) ;
195+
179196 iter. seek ( & prefix) ;
180- let mut next_key = iter. key ( ) ;
181- while let Some ( key) = next_key {
182- if !key. starts_with ( & prefix) {
197+ while iter. valid ( ) {
198+ if let Some ( key) = iter. key ( ) {
199+ if !key. starts_with ( & prefix) {
200+ break ;
201+ }
202+ keys. push ( key[ len..] . to_vec ( ) ) ;
203+ } else {
183204 break ;
184205 }
185- keys. push ( key[ len..] . to_vec ( ) ) ;
186206 iter. next ( ) ;
187- next_key = iter. key ( ) ;
188207 }
189208 Ok ( keys)
190209 }
@@ -198,20 +217,36 @@ impl RocksDbStoreExecutor {
198217 let mut prefix = self . start_key . clone ( ) ;
199218 prefix. extend ( key_prefix) ;
200219 let len = prefix. len ( ) ;
201- let mut iter = self . db . raw_iterator ( ) ;
220+
221+ // Configure ReadOptions optimized for SSDs and iterator performance
222+ let mut read_opts = rocksdb:: ReadOptions :: default ( ) ;
223+ // Enable async I/O for better concurrency
224+ read_opts. set_async_io ( true ) ;
225+
226+ let mut upper_bound = prefix. clone ( ) ;
227+ if let Some ( last_byte) = upper_bound. last_mut ( ) {
228+ if * last_byte < 255 {
229+ * last_byte += 1 ;
230+ read_opts. set_iterate_upper_bound ( upper_bound) ;
231+ }
232+ }
233+
234+ let mut iter = self . db . raw_iterator_opt ( read_opts) ;
202235 let mut key_values = Vec :: new ( ) ;
203236 iter. seek ( & prefix) ;
204- let mut next_key = iter. key ( ) ;
205- while let Some ( key) = next_key {
206- if !key. starts_with ( & prefix) {
237+ while iter. valid ( ) {
238+ if let Some ( key) = iter. key ( ) {
239+ if !key. starts_with ( & prefix) {
240+ break ;
241+ }
242+ if let Some ( value) = iter. value ( ) {
243+ let key_value = ( key[ len..] . to_vec ( ) , value. to_vec ( ) ) ;
244+ key_values. push ( key_value) ;
245+ }
246+ } else {
207247 break ;
208248 }
209- if let Some ( value) = iter. value ( ) {
210- let key_value = ( key[ len..] . to_vec ( ) , value. to_vec ( ) ) ;
211- key_values. push ( key_value) ;
212- }
213249 iter. next ( ) ;
214- next_key = iter. key ( ) ;
215250 }
216251 Ok ( key_values)
217252 }
@@ -373,8 +408,32 @@ impl RocksDbStoreInternal {
373408 total_ram / 4 ,
374409 HYPER_CLOCK_CACHE_BLOCK_SIZE ,
375410 ) ) ;
411+
412+ // Configure bloom filters for prefix iteration optimization
413+ block_options. set_bloom_filter ( 10.0 , false ) ;
414+ block_options. set_whole_key_filtering ( false ) ;
415+
416+ // 32KB blocks instead of default 4KB - reduces iterator seeks
417+ block_options. set_block_size ( 32 * 1024 ) ;
418+ // Use latest format for better compression and performance
419+ block_options. set_format_version ( 5 ) ;
420+
376421 options. set_block_based_table_factory ( & block_options) ;
377422
423+ // Configure prefix extraction for bloom filter optimization
424+ // Use 8 bytes: ROOT_KEY_DOMAIN (1 byte) + BCS variant (1-2 bytes) + identifier start (4-5 bytes)
425+ let prefix_extractor = SliceTransform :: create_fixed_prefix ( 8 ) ;
426+ options. set_prefix_extractor ( prefix_extractor) ;
427+
428+ // 12.5% of memtable size for bloom filter
429+ options. set_memtable_prefix_bloom_ratio ( 0.125 ) ;
430+ // Skip bloom filter for memtable when key exists
431+ options. set_optimize_filters_for_hits ( true ) ;
432+ // Use memory-mapped files for faster reads
433+ options. set_allow_mmap_reads ( true ) ;
434+ // Don't use random access pattern since we do prefix scans
435+ options. set_advise_random_on_open ( false ) ;
436+
378437 let db = DB :: open ( & options, path_buf) ?;
379438 let executor = RocksDbStoreExecutor {
380439 db : Arc :: new ( db) ,
0 commit comments