@@ -77,11 +77,18 @@ int ivf_index(
7777 std::unordered_set<ids_type> deleted_ids_set (deleted_ids.begin (), deleted_ids.end ());
7878 std::vector<size_t > degrees (centroids.num_cols ());
7979 std::vector<ids_type> indices (centroids.num_cols () + 1 );
80- for ( size_t i = 0 ; i < db. num_cols (); ++i ) {
81- if ( auto iter = deleted_ids_set. find (external_ids[i]); iter == deleted_ids_set. end () ) {
80+ if (deleted_ids. empty () ) {
81+ for ( size_t i = 0 ; i < db. num_cols (); ++i ) {
8282 auto j = parts[i];
8383 ++degrees[j];
8484 }
85+ } else {
86+ for (size_t i = 0 ; i < db.num_cols (); ++i) {
87+ if (auto iter = deleted_ids_set.find (external_ids[i]); iter == deleted_ids_set.end ()) {
88+ auto j = parts[i];
89+ ++degrees[j];
90+ }
91+ }
8592 }
8693 indices[0 ] = 0 ;
8794 std::inclusive_scan (begin (degrees), end (degrees), begin (indices) + 1 );
@@ -112,8 +119,8 @@ int ivf_index(
112119 // which will group them nicely -- but a distributed parallel sort may
113120 // be difficult to implement. Even this algorithm is not trivial to
114121 // parallelize, because of the random access to the indices array.
115- for ( size_t i = 0 ; i < db. num_cols (); ++i ) {
116- if ( auto iter = deleted_ids_set. find (external_ids[i]); iter == deleted_ids_set. end () ) {
122+ if (deleted_ids. empty () ) {
123+ for ( size_t i = 0 ; i < db. num_cols (); ++i ) {
117124 size_t bin = parts[i];
118125 size_t ibin = indices[bin];
119126
@@ -125,6 +132,21 @@ int ivf_index(
125132 }
126133 ++indices[bin];
127134 }
135+ } else {
136+ for (size_t i = 0 ; i < db.num_cols (); ++i) {
137+ if (auto iter = deleted_ids_set.find (external_ids[i]); iter == deleted_ids_set.end ()) {
138+ size_t bin = parts[i];
139+ size_t ibin = indices[bin];
140+
141+ shuffled_ids[ibin] = external_ids[i];
142+
143+ assert (ibin < shuffled_db.num_cols ());
144+ for (size_t j = 0 ; j < db.num_rows (); ++j) {
145+ shuffled_db (j, ibin) = db (j, i);
146+ }
147+ ++indices[bin];
148+ }
149+ }
128150 }
129151
130152 std::shift_right (begin (indices), end (indices), 1 );
0 commit comments