@@ -44,18 +44,25 @@ class PreprocessorInterface : public VecsimBaseObject {
4444template <typename DataType>
4545class CosinePreprocessor : public PreprocessorInterface {
4646public:
47- // This preprocessor assumes storage_blob and query_blob
48- // both are preprocessed in the same way, and yield a blob in the same size .
47+ // This preprocessor requires that storage_blob and query_blob have identical memory sizes
48+ // both before processing (as input) and after preprocessing completes .
4949 CosinePreprocessor (std::shared_ptr<VecSimAllocator> allocator, size_t dim,
5050 size_t processed_bytes_count)
5151 : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
5252 dim (dim), processed_bytes_count(processed_bytes_count) {}
5353
54- // If a blob (storage_blob or query_blob) is not nullptr, it means a previous preprocessor
55- // already allocated and processed it. So, we process it inplace. If it's null, we need to
56- // allocate memory for it and copy the original_blob to it.
5754 void preprocess (const void *original_blob, void *&storage_blob, void *&query_blob,
5855 size_t &input_blob_size, unsigned char alignment) const override {
56+ // This assert verifies that if a blob was allocated by a previous preprocessor, its
57+ // size matches our expected processed size. Therefore, it is safe to skip re-allocation and
58+ // process it inplace. Supporting dynamic resizing would require additional size checks (if
59+ // statements) and memory management logic, which could impact performance. Currently, no
60+ // code path requires this capability. If resizing becomes necessary in the future, remove
61+ // the assertions and implement appropriate allocation handling with performance
62+ // considerations.
63+ assert (storage_blob == nullptr || input_blob_size == processed_bytes_count);
64+ assert (query_blob == nullptr || input_blob_size == processed_bytes_count);
65+
5966 // Case 1: Blobs are different (one might be null, or both are allocated and processed
6067 // separately).
6168 if (storage_blob != query_blob) {
@@ -87,6 +94,9 @@ class CosinePreprocessor : public PreprocessorInterface {
8794
8895 void preprocessForStorage (const void *original_blob, void *&blob,
8996 size_t &input_blob_size) const override {
97+ // see assert docs in preprocess
98+ assert (blob == nullptr || input_blob_size == processed_bytes_count);
99+
90100 if (blob == nullptr ) {
91101 blob = this ->allocator ->allocate (processed_bytes_count);
92102 memcpy (blob, original_blob, input_blob_size);
@@ -97,6 +107,8 @@ class CosinePreprocessor : public PreprocessorInterface {
97107
98108 void preprocessQuery (const void *original_blob, void *&blob, size_t &input_blob_size,
99109 unsigned char alignment) const override {
110+ // see assert docs in preprocess
111+ assert (blob == nullptr || input_blob_size == processed_bytes_count);
100112 if (blob == nullptr ) {
101113 blob = this ->allocator ->allocate_aligned (processed_bytes_count, alignment);
102114 memcpy (blob, original_blob, input_blob_size);
0 commit comments