55 * It supports both .raw files (no header) and .fbin files (with header).
66 *
77 * Usage:
8- * ./hnsw_disk_serializer <input_file> <output_name> <dim> <metric> <type> [M] [efConstruction] [threads] [diskWriteBatchThreshold]
8+ * ./hnsw_disk_serializer <input_file> <output_name> <dim> <metric> <type> [M] [efConstruction] [threads] [diskWriteBatchThreshold] [cacheMaxEntriesPerSegment]
99 *
1010 * Arguments:
1111 * input_file - Binary file containing vectors (.raw or .fbin)
1919 * efConstruction - HNSW efConstruction parameter (default: 512)
2020 * threads - Number of threads for parallel indexing (default: 4, use 0 for single-threaded)
2121 * diskWriteBatchThreshold - Threshold for disk write batching (default: 1, larger = fewer disk writes)
22+ * cacheMaxEntriesPerSegment - Max cache entries per segment (default: 10000, 0 = unlimited)
2223 *
2324 * Examples:
2425 * # Using .raw file (dimension required)
@@ -268,7 +269,7 @@ void saveIndexByType(VecSimIndex *index, const std::string &output_file) {
268269
269270int main (int argc, char *argv[]) {
270271 if (argc < 6 ) {
271- std::cerr << " Usage: " << argv[0 ] << " <input_file> <output_name> <dim> <metric> <type> [M] [efConstruction] [threads] [diskWriteBatchThreshold]\n " ;
272+ std::cerr << " Usage: " << argv[0 ] << " <input_file> <output_name> <dim> <metric> <type> [M] [efConstruction] [threads] [diskWriteBatchThreshold] [cacheMaxEntriesPerSegment] \n " ;
272273 std::cerr << " \n Arguments:\n " ;
273274 std::cerr << " input_file - Binary file (.raw or .fbin)\n " ;
274275 std::cerr << " output_name - Base name for output files\n " ;
@@ -279,6 +280,7 @@ int main(int argc, char *argv[]) {
279280 std::cerr << " efConstruction - HNSW efConstruction parameter (default: 512)\n " ;
280281 std::cerr << " threads - Number of threads for parallel indexing (default: 4, use 0 for single-threaded)\n " ;
281282 std::cerr << " diskWriteBatchThreshold - Threshold for disk write batching (default: 1)\n " ;
283+ std::cerr << " cacheMaxEntriesPerSegment - Max cache entries per segment (default: 10000, 0 = unlimited)\n " ;
282284 return 1 ;
283285 }
284286
@@ -291,6 +293,7 @@ int main(int argc, char *argv[]) {
291293 size_t efConstruction = (argc > 7 ) ? std::stoull (argv[7 ]) : 512 ;
292294 size_t num_threads = (argc > 8 ) ? std::stoull (argv[8 ]) : 4 ;
293295 size_t disk_write_batch_threshold = (argc > 9 ) ? std::stoull (argv[9 ]) : 1 ;
296+ size_t cache_max_entries_per_segment = (argc > 10 ) ? std::stoull (argv[10 ]) : 10000 ;
294297
295298 // Check if input file exists
296299 if (!std::filesystem::exists (input_file)) {
@@ -350,6 +353,7 @@ int main(int argc, char *argv[]) {
350353 std::cout << " efConstruction: " << efConstruction << " \n " ;
351354 std::cout << " Threads: " << (num_threads > 0 ? std::to_string (num_threads) : " single-threaded" ) << " \n " ;
352355 std::cout << " DiskWriteBatchThreshold: " << disk_write_batch_threshold << " \n " ;
356+ std::cout << " CacheMaxEntriesPerSegment: " << cache_max_entries_per_segment << " (0 = unlimited)\n " ;
353357 std::cout << " Number of vectors: " << num_vectors << " \n " ;
354358 std::cout << " ==================================\n\n " ;
355359
@@ -404,6 +408,9 @@ int main(int argc, char *argv[]) {
404408 // Set disk write batch threshold for better performance
405409 // Larger batches = fewer disk writes = faster indexing
406410 disk_index->setDiskWriteBatchThreshold (disk_write_batch_threshold);
411+ // Set cache max entries per segment
412+ // Total max cache entries = cache_max_entries_per_segment * NUM_CACHE_SEGMENTS (64)
413+ disk_index->setCacheMaxEntriesPerSegment (cache_max_entries_per_segment);
407414 }
408415
409416 std::cout << " Multi-threaded indexing enabled with " << num_threads << " threads\n " ;
0 commit comments