@@ -31,6 +31,14 @@ class BM_VecSimBasics : public BM_VecSimCommon<index_type_t> {
31
31
static void Range_BF (benchmark::State &st);
32
32
static void Range_HNSW (benchmark::State &st);
33
33
34
+ // Reproduces allocation/deallocation oscillation issue at block size boundaries.
35
+ // Sets up index at blockSize+1 capacity, then repeatedly deletes and re-adds the same vector,
36
+ // triggering constant grow-shrink cycles.
37
+ // This behavior was fixed by PR #753 with a conservative resize strategy that only
38
+ // shrinks containers when there are 2+ free blocks, preventing oscillation cycles.
39
+ // Expected: High allocation overhead before fix, stable performance after fix.
40
+ static void UpdateAtBlockSize (benchmark::State &st);
41
+
34
42
private:
35
43
// Vectors of vector to store deleted labels' data.
36
44
using LabelData = std::vector<std::vector<data_t >>;
@@ -64,7 +72,9 @@ void BM_VecSimBasics<index_type_t>::AddLabel(benchmark::State &st) {
64
72
// For tiered index, wait for all threads to finish indexing
65
73
BM_VecSimGeneral::mock_thread_pool.thread_pool_wait ();
66
74
67
- st.counters [" memory_per_vector" ] = (double )memory_delta / (double )added_vec_count;
75
+ st.counters [" memory_per_vector" ] =
76
+ benchmark::Counter ((double )memory_delta / (double )added_vec_count,
77
+ benchmark::Counter::kDefaults , benchmark::Counter::OneK::kIs1024 );
68
78
st.counters [" vectors_per_label" ] = vec_per_label;
69
79
70
80
assert (VecSimIndex_IndexSize (index) == N_VECTORS + added_vec_count);
@@ -110,7 +120,9 @@ void BM_VecSimBasics<index_type_t>::AddLabel_AsyncIngest(benchmark::State &st) {
110
120
}
111
121
112
122
size_t memory_delta = (INDICES[st.range (0 )])->getAllocationSize () - memory_before;
113
- st.counters [" memory_per_vector" ] = (double )memory_delta / (double )added_vec_count;
123
+ st.counters [" memory_per_vector" ] =
124
+ benchmark::Counter ((double )memory_delta / (double )added_vec_count,
125
+ benchmark::Counter::kDefaults , benchmark::Counter::OneK::kIs1024 );
114
126
st.counters [" vectors_per_label" ] = vec_per_label;
115
127
st.counters [" num_threads" ] = BM_VecSimGeneral::mock_thread_pool.thread_pool_size ;
116
128
@@ -159,7 +171,9 @@ void BM_VecSimBasics<index_type_t>::DeleteLabel(algo_t *index, benchmark::State
159
171
if (VecSimIndex_BasicInfo (index).algo == VecSimAlgo_TIERED) {
160
172
reinterpret_cast <TieredHNSWIndex<data_t , data_t > *>(index)->executeReadySwapJobs ();
161
173
}
162
- st.counters [" memory_per_vector" ] = memory_delta / (double )removed_vectors_count;
174
+ st.counters [" memory_per_vector" ] =
175
+ benchmark::Counter ((double )memory_delta / (double )removed_vectors_count,
176
+ benchmark::Counter::kDefaults , benchmark::Counter::OneK::kIs1024 );
163
177
164
178
// Restore index state.
165
179
// For each label in removed_labels_data
@@ -207,7 +221,10 @@ void BM_VecSimBasics<index_type_t>::DeleteLabel_AsyncRepair(benchmark::State &st
207
221
// Avg. memory delta per vector equals the total memory delta divided by the number
208
222
// of deleted vectors.
209
223
int memory_delta = tiered_index->getAllocationSize () - memory_before;
210
- st.counters [" memory_per_vector" ] = memory_delta / (double )removed_vectors_count;
224
+
225
+ st.counters [" memory_per_vector" ] =
226
+ benchmark::Counter ((double )memory_delta / (double )removed_vectors_count,
227
+ benchmark::Counter::kDefaults , benchmark::Counter::OneK::kIs1024 );
211
228
st.counters [" num_threads" ] = (double )BM_VecSimGeneral::mock_thread_pool.thread_pool_size ;
212
229
st.counters [" num_zombies" ] = tiered_index->idToSwapJob .size ();
213
230
@@ -279,6 +296,69 @@ void BM_VecSimBasics<index_type_t>::Range_HNSW(benchmark::State &st) {
279
296
st.counters [" Recall" ] = (float )total_res / total_res_bf;
280
297
}
281
298
299
+ template <typename index_type_t >
300
+ void BM_VecSimBasics<index_type_t >::UpdateAtBlockSize(benchmark::State &st) {
301
+ auto index = INDICES[st.range (0 )];
302
+ size_t initial_index_size = VecSimIndex_IndexSize (index);
303
+ // Calculate vectors needed to reach next block boundary
304
+ size_t vecs_to_blocksize =
305
+ BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size);
306
+ assert (vecs_to_blocksize < BM_VecSimGeneral::block_size);
307
+ labelType initial_label_count = index->indexLabelCount ();
308
+ labelType curr_label = initial_label_count;
309
+
310
+ // Set up index at blockSize+1 to trigger oscillation issue
311
+ // Make sure we have enough queries to add a new label.
312
+ assert (N_QUERIES > BM_VecSimGeneral::block_size);
313
+ size_t overhead = 1 ;
314
+ size_t added_vec_count = vecs_to_blocksize + overhead;
315
+ for (size_t i = 0 ; i < added_vec_count; ++i) {
316
+ VecSimIndex_AddVector (index, QUERIES[added_vec_count % N_QUERIES].data (), curr_label++);
317
+ }
318
+ // For tiered index, wait for all threads to finish indexing
319
+ BM_VecSimGeneral::mock_thread_pool.thread_pool_wait ();
320
+ assert (VecSimIndex_IndexSize (index) % BM_VecSimGeneral::block_size == overhead);
321
+ assert (VecSimIndex_IndexSize (index) == N_VECTORS + added_vec_count);
322
+
323
+ std::cout << " Added " << added_vec_count << " vectors to reach block size boundary."
324
+ << std::endl;
325
+ std::cout << " Index size is now " << VecSimIndex_IndexSize (index) << std::endl;
326
+ std::cout << " Last label is " << curr_label - 1 << std::endl;
327
+
328
+ // Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles
329
+ labelType label_to_update = curr_label - 1 ;
330
+ size_t index_cap = index->indexCapacity ();
331
+ for (auto _ : st) {
332
+ // Remove the vector directly from hnsw
333
+ size_t ret = VecSimIndex_DeleteVector (
334
+ INDICES[st.range (0 ) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range (0 )],
335
+ label_to_update);
336
+ assert (ret == 1 );
337
+ assert (index->indexCapacity () == index_cap - BM_VecSimGeneral::block_size);
338
+ // Capacity should shrink by one block after deletion
339
+ ret = VecSimIndex_AddVector (index, QUERIES[(added_vec_count - 1 ) % N_QUERIES].data (),
340
+ label_to_update);
341
+ assert (ret == 1 );
342
+ BM_VecSimGeneral::mock_thread_pool.thread_pool_wait ();
343
+ assert (VecSimIndex_IndexSize (
344
+ INDICES[st.range (0 ) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range (0 )]) ==
345
+ N_VECTORS + added_vec_count);
346
+ // Capacity should grow back to original size after addition
347
+ assert (index->indexCapacity () == index_cap);
348
+ }
349
+ assert (VecSimIndex_IndexSize (index) == N_VECTORS + added_vec_count);
350
+
351
+ // Clean-up all the new vectors to restore the index size to its original value.
352
+
353
+ size_t new_label_count = index->indexLabelCount ();
354
+ for (size_t label = initial_label_count; label < new_label_count; label++) {
355
+ // If index is tiered HNSW, remove directly from the underline HNSW.
356
+ VecSimIndex_DeleteVector (
357
+ INDICES[st.range (0 ) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range (0 )], label);
358
+ }
359
+ assert (VecSimIndex_IndexSize (index) == N_VECTORS);
360
+ }
361
+
282
362
#define UNIT_AND_ITERATIONS Unit (benchmark::kMillisecond )->Iterations(BM_VecSimGeneral::block_size)
283
363
284
364
// The actual radius will be the given arg divided by 100, since arg must be an integer.
@@ -324,3 +404,8 @@ void BM_VecSimBasics<index_type_t>::Range_HNSW(benchmark::State &st) {
324
404
}
325
405
#define REGISTER_DeleteLabel (BM_FUNC ) \
326
406
BENCHMARK_REGISTER_F (BM_VecSimBasics, BM_FUNC)->UNIT_AND_ITERATIONS
407
+
408
+ #define REGISTER_UpdateAtBlockSize (BM_FUNC, VecSimAlgo ) \
409
+ BENCHMARK_REGISTER_F (BM_VecSimBasics, BM_FUNC) \
410
+ ->UNIT_AND_ITERATIONS->Arg(VecSimAlgo) \
411
+ ->ArgName(#VecSimAlgo)
0 commit comments