Skip to content

Commit f2af502

Browse files
authored
use empty_index_set for serialize empty index (antgroup#429)
Signed-off-by: LHT129 <tianlan.lht@antgroup.com>
1 parent f35ef1a commit f2af502

File tree

7 files changed

+52
-77
lines changed

7 files changed

+52
-77
lines changed

src/algorithm/hgraph.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,12 @@
2222

2323
#include "common.h"
2424
#include "data_cell/sparse_graph_datacell.h"
25+
#include "empty_index_binary_set.h"
2526
#include "impl/pruning_strategy.h"
2627
#include "index/hgraph_index_zparameters.h"
2728
#include "logger.h"
2829

2930
namespace vsag {
30-
static BinarySet
31-
empty_binaryset() {
32-
const std::string empty_str = "EMPTY_INDEX";
33-
size_t num_bytes = empty_str.length();
34-
std::shared_ptr<int8_t[]> bin(new int8_t[num_bytes]);
35-
memcpy(bin.get(), empty_str.c_str(), empty_str.length());
36-
Binary b{
37-
.data = bin,
38-
.size = num_bytes,
39-
};
40-
BinarySet bs;
41-
bs.Set(BLANK_INDEX, b);
42-
43-
return bs;
44-
}
4531

4632
static uint64_t
4733
next_multiple_of_power_of_two(uint64_t x, uint64_t n) {
@@ -252,7 +238,7 @@ HGraph::EstimateMemory(uint64_t num_elements) const {
252238
tl::expected<BinarySet, Error>
253239
HGraph::Serialize() const {
254240
if (GetNumElements() == 0) {
255-
return empty_binaryset();
241+
return EmptyIndexBinarySet::Make("EMPTY_HGRAPH");
256242
}
257243
SlowTaskTimer t("hgraph Serialize");
258244
size_t num_bytes = this->cal_serialize_size();

src/empty_index_binary_set.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
2+
// Copyright 2024-present the vsag project
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
#pragma once
17+
18+
#include <cstring>
19+
20+
#include "vsag/binaryset.h"
21+
#include "vsag/constants.h"
22+
23+
namespace vsag {
24+
class EmptyIndexBinarySet {
25+
public:
26+
static BinarySet
27+
Make(const std::string& name = "EMPTY_INDEX") {
28+
const std::string empty_str = name;
29+
size_t num_bytes = empty_str.length();
30+
std::shared_ptr<int8_t[]> bin(new int8_t[num_bytes]);
31+
memcpy(bin.get(), empty_str.c_str(), empty_str.length());
32+
Binary b{
33+
.data = bin,
34+
.size = num_bytes,
35+
};
36+
BinarySet bs;
37+
bs.Set(BLANK_INDEX, b);
38+
39+
return bs;
40+
}
41+
};
42+
43+
} // namespace vsag

src/index/diskann.cpp

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <utility>
2929

3030
#include "data_cell/flatten_datacell.h"
31+
#include "empty_index_binary_set.h"
3132
#include "impl/odescent_graph_builder.h"
3233
#include "io/memory_io_parameter.h"
3334
#include "quantization/fp32_quantizer_parameter.h"
@@ -619,29 +620,11 @@ DiskANN::range_search(const DatasetPtr& query,
619620
}
620621
}
621622

622-
BinarySet
623-
DiskANN::empty_binaryset() {
624-
// version 0 pairs:
625-
// - hnsw_blank: b"EMPTY_DISKANN"
626-
const std::string empty_str = "EMPTY_DISKANN";
627-
size_t num_bytes = empty_str.length();
628-
std::shared_ptr<int8_t[]> bin(new int8_t[num_bytes]);
629-
memcpy(bin.get(), empty_str.c_str(), empty_str.length());
630-
Binary b{
631-
.data = bin,
632-
.size = num_bytes,
633-
};
634-
BinarySet bs;
635-
bs.Set(BLANK_INDEX, b);
636-
637-
return bs;
638-
}
639-
640623
tl::expected<BinarySet, Error>
641624
DiskANN::serialize() const {
642625
if (status_ == IndexStatus::EMPTY) {
643626
// return a special binaryset means empty
644-
return empty_binaryset();
627+
return EmptyIndexBinarySet::Make("EMPTY_DISKANN");
645628
}
646629

647630
SlowTaskTimer t("diskann serialize");

src/index/diskann.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,6 @@ class DiskANN : public Index {
205205
tl::expected<void, Error>
206206
load_disk_index(const BinarySet& binary_set);
207207

208-
static BinarySet
209-
empty_binaryset();
210-
211208
private:
212209
std::shared_ptr<LocalFileReader> reader_;
213210
std::shared_ptr<diskann::PQFlashIndex<float, int64_t>> index_;

src/index/hnsw.cpp

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "common.h"
2828
#include "data_cell/flatten_datacell.h"
2929
#include "data_cell/graph_datacell_parameter.h"
30+
#include "empty_index_binary_set.h"
3031
#include "index/hnsw_zparameters.h"
3132
#include "io/memory_block_io_parameter.h"
3233
#include "io/memory_io_parameter.h"
@@ -421,29 +422,11 @@ HNSW::range_search(const DatasetPtr& query,
421422
}
422423
}
423424

424-
BinarySet
425-
HNSW::empty_binaryset() {
426-
// version 0 pairs:
427-
// - hnsw_blank: b"EMPTY_HNSW"
428-
const std::string empty_str = "EMPTY_HNSW";
429-
size_t num_bytes = empty_str.length();
430-
std::shared_ptr<int8_t[]> bin(new int8_t[num_bytes]);
431-
memcpy(bin.get(), empty_str.c_str(), empty_str.length());
432-
Binary b{
433-
.data = bin,
434-
.size = num_bytes,
435-
};
436-
BinarySet bs;
437-
bs.Set(BLANK_INDEX, b);
438-
439-
return bs;
440-
}
441-
442425
tl::expected<BinarySet, Error>
443426
HNSW::serialize() const {
444427
if (GetNumElements() == 0) {
445428
// return a special binaryset means empty
446-
return empty_binaryset();
429+
return EmptyIndexBinarySet::Make("EMPTY_HNSW");
447430
}
448431

449432
SlowTaskTimer t("hnsw serialize");

src/index/hnsw.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,6 @@ class HNSW : public Index {
319319
tl::expected<void, Error>
320320
merge(const std::vector<MergeUnit>& merge_units);
321321

322-
static BinarySet
323-
empty_binaryset();
324-
325322
void
326323
init_feature_list();
327324

src/index/pyramid.cpp

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,11 @@
1616
#include "pyramid.h"
1717

1818
#include "data_cell/flatten_interface.h"
19+
#include "empty_index_binary_set.h"
1920
#include "impl/odescent_graph_builder.h"
2021
#include "io/memory_io_parameter.h"
21-
namespace vsag {
22-
23-
static BinarySet
24-
empty_binaryset() {
25-
const std::string empty_str = "EMPTY_INDEX";
26-
size_t num_bytes = empty_str.length();
27-
std::shared_ptr<int8_t[]> bin(new int8_t[num_bytes]);
28-
memcpy(bin.get(), empty_str.c_str(), empty_str.length());
29-
Binary b{
30-
.data = bin,
31-
.size = num_bytes,
32-
};
33-
BinarySet bs;
34-
bs.Set(BLANK_INDEX, b);
3522

36-
return bs;
37-
}
23+
namespace vsag {
3824

3925
std::vector<std::string>
4026
split(const std::string& str, char delimiter) {
@@ -346,7 +332,7 @@ Pyramid::search_impl(const DatasetPtr& query, int64_t limit, const SearchFunc& s
346332
tl::expected<BinarySet, Error>
347333
Pyramid::Serialize() const {
348334
if (GetNumElements() == 0) {
349-
return empty_binaryset();
335+
return EmptyIndexBinarySet::Make("EMPTY_PYRAMID");
350336
}
351337
SlowTaskTimer t("Pyramid Serialize");
352338
size_t num_bytes = this->cal_serialize_size();

0 commit comments

Comments
 (0)