Skip to content

Commit c10d1dc

Browse files
authored
[0.6] [MOD-10559] Decouple the shrinking and growing logic of large containers in Flat and HNSW (#783)
Enhance BruteForce index tests for resizing and alignment - Refactor `resize_and_align_index` tests in both `test_bruteforce.cpp` and `test_bruteforce_multi.cpp` to improve clarity and maintainability. - Introduce helper functions to verify index size and capacity, reducing code duplication. - Add comprehensive checks for index size, capacity, and label counts during vector addition and deletion. - Implement tests to ensure no oscillation in index size and capacity during repeated add/delete cycles. - Address edge cases for initial capacity and resizing behavior, ensuring proper alignment with block sizes.
1 parent 71bd103 commit c10d1dc

File tree

6 files changed

+623
-191
lines changed

6 files changed

+623
-191
lines changed

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ class BruteForceIndex : public VecSimIndexAbstract<DistType> {
5454
inline vecsim_stl::vector<VectorBlock *> getVectorBlocks() const { return vectorBlocks; }
5555
virtual ~BruteForceIndex();
5656

57+
#ifdef BUILD_TESTS
58+
size_t getStoredVectorsCount() const {
59+
size_t actual_stored_vec = 0;
60+
for (auto &block : vectorBlocks) {
61+
actual_stored_vec += block->getLength();
62+
}
63+
64+
return actual_stored_vec;
65+
}
66+
#endif
67+
5768
protected:
5869
// Private internal function that implements generic single vector insertion.
5970
virtual int appendVector(const void *vector_data, labelType label);
@@ -142,10 +153,14 @@ int BruteForceIndex<DataType, DistType>::appendVector(const void *vector_data, l
142153
size_t idToLabelMapping_size = this->idToLabelMapping.size();
143154

144155
if (id >= idToLabelMapping_size) {
156+
assert(indexCapacity() == idToLabelMapping.capacity());
157+
assert(idToLabelMapping.size() == idToLabelMapping.capacity());
145158
size_t last_block_vectors_count = id % this->blockSize;
146-
this->idToLabelMapping.resize(
147-
idToLabelMapping_size + this->blockSize - last_block_vectors_count, 0);
159+
size_t new_size = idToLabelMapping_size + this->blockSize - last_block_vectors_count;
160+
assert(new_size % this->blockSize == 0);
161+
this->idToLabelMapping.resize(new_size, 0);
148162
this->idToLabelMapping.shrink_to_fit();
163+
assert(idToLabelMapping.size() == idToLabelMapping.capacity());
149164
}
150165

151166
// add label to idToLabelMapping
@@ -196,12 +211,20 @@ int BruteForceIndex<DataType, DistType>::removeVector(idType id_to_delete) {
196211

197212
// Resize and align the idToLabelMapping.
198213
size_t idToLabel_size = idToLabelMapping.size();
199-
// If the new size is smaller by at least one block comparing to the idToLabelMapping
214+
// If the new size is smaller by at least two blocks comparing to the idToLabelMapping,
215+
// or if the new size is 0 and the capacity is at least one block,
200216
// align to be a multiplication of blocksize and resize by one block.
201-
if (this->count + this->blockSize <= idToLabel_size) {
217+
if ((this->count + 2 * this->blockSize <= idToLabel_size) ||
218+
// Handle last block
219+
(this->count == 0 && idToLabel_size >= this->blockSize)) {
202220
size_t vector_to_align_count = idToLabel_size % this->blockSize;
203-
this->idToLabelMapping.resize(idToLabel_size - this->blockSize - vector_to_align_count);
221+
size_t new_size = idToLabel_size - this->blockSize - vector_to_align_count;
222+
assert(new_size >= this->count);
223+
assert(new_size % this->blockSize == 0);
224+
assert(idToLabelMapping.size() == idToLabelMapping.capacity());
225+
this->idToLabelMapping.resize(new_size);
204226
this->idToLabelMapping.shrink_to_fit();
227+
assert(idToLabelMapping.size() == idToLabelMapping.capacity());
205228
}
206229
}
207230

src/VecSim/algorithms/brute_force/brute_force_friend_tests.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_vector_update_test_Test)
1111
INDEX_TEST_FRIEND_CLASS(BruteForceTest_resize_and_align_index_Test)
1212
INDEX_TEST_FRIEND_CLASS(BruteForceTest_resize_and_align_index_largeInitialCapacity_Test)
13+
INDEX_TEST_FRIEND_CLASS(BruteForceTest_resize_and_align_index_smallInitialCapacity_Test)
1314
INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_empty_index_Test)
1415
INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_reindexing_same_vector_Test)
1516
INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_reindexing_same_vector_different_id_Test)
1617
INDEX_TEST_FRIEND_CLASS(BruteForceTest_test_delete_swap_block_Test)
1718
INDEX_TEST_FRIEND_CLASS(BruteForceTest_test_dynamic_bf_info_iterator_Test)
1819
INDEX_TEST_FRIEND_CLASS(BruteForceTest_brute_force_zero_minimal_capacity_Test)
1920
INDEX_TEST_FRIEND_CLASS(BruteForceTest_preferAdHocOptimization_Test)
21+
INDEX_TEST_FRIEND_CLASS(IndexAllocatorTest_test_bf_index_block_size_1_Test)
2022
INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,13 +1151,16 @@ int HNSWIndex<DataType, DistType>::removeVector(const idType element_internal_id
11511151
--cur_element_count;
11521152
--max_id;
11531153

1154-
// If we need to free a complete block & there is a least one block between the
1155-
// capacity and the size.
1154+
// If the new size is smaller by at least two blocks comparing to the idToLabelMapping,
1155+
// or if the new size is 0 and the capacity is at least one block,
1156+
// align to be a multiplication of blocksize and resize by one block.
11561157
if (cur_element_count % this->blockSize == 0 &&
1157-
cur_element_count + this->blockSize <= max_elements_) {
1158+
((cur_element_count + 2 * this->blockSize <= max_elements_) ||
1159+
(cur_element_count == 0 && max_elements_ >= this->blockSize))) {
11581160

11591161
// Check if the capacity is aligned to block size.
11601162
size_t extra_space_to_free = max_elements_ % this->blockSize;
1163+
assert(max_elements_ - this->blockSize - extra_space_to_free >= cur_element_count);
11611164

11621165
// Remove one block from the capacity.
11631166
this->resizeIndex(max_elements_ - this->blockSize - extra_space_to_free);

0 commit comments

Comments
 (0)