Skip to content

Commit 07ad5f4

Browse files
alonre24GuyAv46
authored andcommitted
Additional benchmarks - memory and search (#387)
1 parent 47ab7a6 commit 07ad5f4

31 files changed

+638
-784
lines changed

src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,4 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_preferAdHocOptimization_Test)
5050
INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_runGCAPI_Test)
5151

5252
INDEX_TEST_FRIEND_CLASS(BM_VecSimBasics)
53+
INDEX_TEST_FRIEND_CLASS(BM_VecSimCommon)

src/VecSim/index_factories/tiered_factory.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ VecSimIndex *NewIndex(const TieredIndexParams *params, HNSWIndex<DataType, DistT
3434
.multi = params->primaryIndexParams->algoParams.hnswParams.multi,
3535
.blockSize = params->primaryIndexParams->algoParams.hnswParams.blockSize};
3636

37-
AbstractIndexInitParams abstractInitParams = {.allocator = hnsw_index->getAllocator(),
37+
std::shared_ptr<VecSimAllocator> flat_allocator = VecSimAllocator::newVecsimAllocator();
38+
AbstractIndexInitParams abstractInitParams = {.allocator = flat_allocator,
3839
.dim = bf_params.dim,
3940
.vecType = bf_params.type,
4041
.metric = bf_params.metric,

src/VecSim/vec_sim_tiered_index.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ class VecSimTieredIndex : public VecSimIndexInterface {
9393
static VecSimWriteMode getWriteMode() { return VecSimIndexInterface::asyncWriteMode; }
9494

9595
#ifdef BUILD_TESTS
96-
inline VecSimIndexAbstract<DistType> *getFlatbufferIndex() { return this->frontendIndex; }
96+
inline VecSimIndexAbstract<DistType> *getFlatBufferIndex() { return this->frontendIndex; }
97+
inline size_t getFlatBufferLimit() { return this->flatBufferLimit; }
9798
#endif
9899

99100
private:

src/python_bindings/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ endif()
2121

2222
add_subdirectory(${root}/src/VecSim VectorSimilarity)
2323

24-
include_directories(${root}/src)
24+
include_directories(${root}/src ${root}/tests/utils)
2525

26-
pybind11_add_module(VecSim bindings.cpp)
26+
pybind11_add_module(VecSim ../../tests/utils/mock_thread_pool.cpp bindings.cpp)
2727

2828
target_link_libraries(VecSim VectorSimilarity)
2929

src/python_bindings/bindings.cpp

Lines changed: 34 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@
1616
#include <thread>
1717
#include <VecSim/algorithms/hnsw/hnsw_single.h>
1818
#include <VecSim/algorithms/brute_force/brute_force_single.h>
19-
#include "tiered_index_mock.h"
19+
#include "mock_thread_pool.h"
2020

2121
namespace py = pybind11;
22-
using namespace tiered_index_mock;
2322

2423
// Helper function that iterates query results and wrap them in python numpy object -
2524
// a tuple of two 2D arrays: (labels, distances)
@@ -366,92 +365,64 @@ class PyHNSWLibIndex : public PyVecSimIndex {
366365
};
367366

368367
class PyTieredIndex : public PyVecSimIndex {
369-
private:
368+
protected:
369+
tieredIndexMock mock_thread_pool;
370+
370371
VecSimIndexAbstract<float> *getFlatBuffer() {
371372
return reinterpret_cast<VecSimTieredIndex<float, float> *>(this->index.get())
372-
->getFlatbufferIndex();
373+
->getFlatBufferIndex();
373374
}
374375

375-
protected:
376-
JobQueue jobQueue; // External queue that holds the jobs.
377-
IndexExtCtx jobQueueCtx; // External context to be sent to the submit callback.
378-
SubmitCB submitCb; // A callback that submits an array of jobs into a given jobQueue.
379-
size_t flatBufferLimit; // Maximum size allowed for the flat buffer. If flat buffer is full, use
380-
// in-place insertion.
381-
bool run_thread;
382-
std::bitset<MAX_POOL_SIZE> executions_status;
383-
384-
TieredIndexParams TieredIndexParams_Init() {
385-
TieredIndexParams ret = {
386-
.jobQueue = &this->jobQueue,
387-
.jobQueueCtx = &this->jobQueueCtx,
388-
.submitCb = this->submitCb,
389-
.flatBufferLimit = this->flatBufferLimit,
376+
TieredIndexParams getTieredIndexParams(size_t buffer_limit) {
377+
// Create TieredIndexParams using the mock thread pool.
378+
return TieredIndexParams{
379+
.jobQueue = &(this->mock_thread_pool.jobQ),
380+
.jobQueueCtx = this->mock_thread_pool.ctx,
381+
.submitCb = tieredIndexMock::submit_callback,
382+
.flatBufferLimit = buffer_limit,
390383
};
391-
392-
return ret;
393384
}
394385

395386
public:
396-
explicit PyTieredIndex(size_t BufferLimit = 3000000)
397-
: submitCb(submit_callback), flatBufferLimit(BufferLimit), run_thread(true) {
398-
399-
for (size_t i = 0; i < THREAD_POOL_SIZE; i++) {
400-
ThreadParams params(run_thread, executions_status, i, jobQueue);
401-
thread_pool.emplace_back(thread_main_loop, params);
402-
}
403-
}
404-
405-
virtual ~PyTieredIndex() = 0;
387+
explicit PyTieredIndex() { mock_thread_pool.init_threads(); }
406388

407389
void WaitForIndex(size_t waiting_duration = 10) {
408-
bool keep_wating = true;
409-
while (keep_wating) {
410-
std::this_thread::sleep_for(std::chrono::milliseconds(waiting_duration));
411-
std::unique_lock<std::mutex> lock(queue_guard);
412-
if (jobQueue.empty()) {
413-
while (true) {
414-
if (executions_status.count() == 0) {
415-
keep_wating = false;
416-
break;
417-
}
418-
std::this_thread::sleep_for(std::chrono::milliseconds(waiting_duration));
419-
}
420-
}
421-
}
390+
mock_thread_pool.thread_pool_wait(waiting_duration);
422391
}
423392

424393
size_t getFlatIndexSize() { return getFlatBuffer()->indexLabelCount(); }
425394

426-
static size_t GetThreadsNum() { return THREAD_POOL_SIZE; }
395+
size_t getThreadsNum() { return mock_thread_pool.thread_pool_size; }
427396

428-
size_t getBufferLimit() { return flatBufferLimit; }
397+
size_t getBufferLimit() {
398+
return reinterpret_cast<VecSimTieredIndex<float, float> *>(this->index.get())
399+
->getFlatBufferLimit();
400+
}
429401
};
430402

431-
PyTieredIndex::~PyTieredIndex() { thread_pool_terminate(jobQueue, run_thread); }
432403
class PyTiered_HNSWIndex : public PyTieredIndex {
433404
public:
434405
explicit PyTiered_HNSWIndex(const HNSWParams &hnsw_params,
435-
const TieredHNSWParams &tiered_hnsw_params) {
406+
const TieredHNSWParams &tiered_hnsw_params, size_t buffer_limit) {
436407

437408
// Create primaryIndexParams and specific params for hnsw tiered index.
438409
VecSimParams primary_index_params = {.algo = VecSimAlgo_HNSWLIB,
439410
.algoParams = {.hnswParams = HNSWParams{hnsw_params}}};
440411

441-
// create TieredIndexParams
442-
TieredIndexParams tiered_params = TieredIndexParams_Init();
443-
412+
auto tiered_params = this->getTieredIndexParams(buffer_limit);
444413
tiered_params.primaryIndexParams = &primary_index_params;
445414
tiered_params.specificParams.tieredHnswParams = tiered_hnsw_params;
446415

447-
// create VecSimParams for TieredIndexParams
416+
// Create VecSimParams for TieredIndexParams
448417
VecSimParams params = {.algo = VecSimAlgo_TIERED,
449418
.algoParams = {.tieredParams = TieredIndexParams{tiered_params}}};
450419

451420
this->index = std::shared_ptr<VecSimIndex>(VecSimIndex_New(&params), VecSimIndex_Free);
421+
452422
// Set the created tiered index in the index external context.
453-
this->jobQueueCtx.index_strong_ref = this->index;
423+
this->mock_thread_pool.ctx->index_strong_ref = this->index;
454424
}
425+
455426
size_t HNSWLabelCount() {
456427
return this->index->info().tieredInfo.backendCommonInfo.indexLabelCount;
457428
}
@@ -568,17 +539,17 @@ PYBIND11_MODULE(VecSim, m) {
568539
py::arg("radius"), py::arg("query_param") = nullptr, py::arg("num_threads") = -1);
569540

570541
py::class_<PyTieredIndex, PyVecSimIndex>(m, "TieredIndex")
571-
.def("wait_for_index", &PyTiered_HNSWIndex::WaitForIndex, py::arg("waiting_duration") = 10)
572-
.def("get_curr_bf_size", &PyTiered_HNSWIndex::getFlatIndexSize)
573-
.def("get_buffer_limit", &PyTiered_HNSWIndex::getBufferLimit)
574-
.def_static("get_threads_num", &PyTieredIndex::GetThreadsNum);
542+
.def("wait_for_index", &PyTieredIndex::WaitForIndex, py::arg("waiting_duration") = 10)
543+
.def("get_curr_bf_size", &PyTieredIndex::getFlatIndexSize)
544+
.def("get_buffer_limit", &PyTieredIndex::getBufferLimit)
545+
.def("get_threads_num", &PyTieredIndex::getThreadsNum);
575546

576547
py::class_<PyTiered_HNSWIndex, PyTieredIndex>(m, "Tiered_HNSWIndex")
577-
.def(
578-
py::init([](const HNSWParams &hnsw_params, const TieredHNSWParams &tiered_hnsw_params) {
579-
return new PyTiered_HNSWIndex(hnsw_params, tiered_hnsw_params);
580-
}),
581-
py::arg("hnsw_params"), py::arg("tiered_hnsw_params"))
548+
.def(py::init([](const HNSWParams &hnsw_params, const TieredHNSWParams &tiered_hnsw_params,
549+
size_t flat_buffer_size = DEFAULT_BLOCK_SIZE) {
550+
return new PyTiered_HNSWIndex(hnsw_params, tiered_hnsw_params, flat_buffer_size);
551+
}),
552+
py::arg("hnsw_params"), py::arg("tiered_hnsw_params"), py::arg("flat_buffer_size"))
582553
.def("hnsw_label_count", &PyTiered_HNSWIndex::HNSWLabelCount);
583554

584555
py::class_<PyBFIndex, PyVecSimIndex>(m, "BFIndex")

src/python_bindings/tiered_index_mock.h

Lines changed: 0 additions & 129 deletions
This file was deleted.

tests/benchmark/bm_batch_iterator.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,17 @@ class BM_BatchIterator : public BM_VecSimIndex<index_type_t> {
2424
static void HNSW_VariableBatchSize(benchmark::State &st);
2525
static void HNSW_BatchesToAdhocBF(benchmark::State &st);
2626

27-
static void RunBatchedSearch_HNSW(benchmark::State &st, size_t &correct, size_t iter,
27+
static void RunBatchedSearch_HNSW(benchmark::State &st, std::atomic_int &correct, size_t iter,
2828
size_t num_batches, size_t batch_size, size_t &total_res_num,
2929
size_t batch_increase_factor, size_t index_memory,
3030
double &memory_delta);
3131
};
3232

3333
template <typename index_type_t>
34-
void BM_BatchIterator<index_type_t>::RunBatchedSearch_HNSW(benchmark::State &st, size_t &correct,
35-
size_t iter, size_t num_batches,
36-
size_t batch_size, size_t &total_res_num,
37-
size_t batch_increase_factor,
38-
size_t index_memory,
39-
double &memory_delta) {
34+
void BM_BatchIterator<index_type_t>::RunBatchedSearch_HNSW(
35+
benchmark::State &st, std::atomic_int &correct, size_t iter, size_t num_batches,
36+
size_t batch_size, size_t &total_res_num, size_t batch_increase_factor, size_t index_memory,
37+
double &memory_delta) {
4038
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(
4139
INDICES.at(VecSimAlgo_HNSWLIB), QUERIES[iter % N_QUERIES].data(), nullptr);
4240
VecSimQueryResult_List accumulated_results[num_batches];
@@ -158,7 +156,7 @@ void BM_BatchIterator<index_type_t>::HNSW_FixedBatchSize(benchmark::State &st) {
158156
size_t num_batches = st.range(1);
159157
size_t total_res_num = num_batches * batch_size;
160158
size_t iter = 0;
161-
size_t correct = 0;
159+
std::atomic_int correct = 0;
162160
size_t index_memory = VecSimIndex_Info(INDICES[VecSimAlgo_HNSWLIB]).commonInfo.memory;
163161
double memory_delta = 0.0;
164162

@@ -177,7 +175,7 @@ void BM_BatchIterator<index_type_t>::HNSW_VariableBatchSize(benchmark::State &st
177175
size_t num_batches = st.range(1);
178176
size_t total_res_num;
179177
size_t iter = 0;
180-
size_t correct = 0;
178+
std::atomic_int correct = 0;
181179
size_t index_memory = VecSimIndex_Info(INDICES[VecSimAlgo_HNSWLIB]).commonInfo.memory;
182180
double memory_delta = 0.0;
183181

@@ -196,7 +194,7 @@ void BM_BatchIterator<index_type_t>::HNSW_BatchesToAdhocBF(benchmark::State &st)
196194
size_t num_batches = st.range(1);
197195
size_t total_res_num;
198196
size_t iter = 0;
199-
size_t correct = 0;
197+
std::atomic_int correct = 0;
200198
size_t index_memory = VecSimIndex_Info(INDICES[VecSimAlgo_HNSWLIB]).commonInfo.memory;
201199
double memory_delta = 0.0;
202200

0 commit comments

Comments
 (0)