Skip to content

Commit 94f2fcd

Browse files
authored
Reduce locks for adhoc BF (#419) (#420)
1 parent a6609c3 commit 94f2fcd

16 files changed

+154
-128
lines changed

src/VecSim/algorithms/brute_force/brute_force_multi.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class BruteForceIndex_Multi : public BruteForceIndex<DataType, DistType> {
2626
int addVector(const void *vector_data, labelType label, void *auxiliaryCtx = nullptr) override;
2727
int deleteVector(labelType labelType) override;
2828
int deleteVectorById(labelType label, idType id) override;
29-
double getDistanceFrom(labelType label, const void *vector_data) const override;
29+
double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override;
3030
inline size_t indexLabelCount() const override { return this->labelToIdsLookup.size(); }
3131

3232
inline std::unique_ptr<vecsim_stl::abstract_results_container>
@@ -192,7 +192,8 @@ int BruteForceIndex_Multi<DataType, DistType>::deleteVectorById(labelType label,
192192
}
193193

194194
template <typename DataType, typename DistType>
195-
double BruteForceIndex_Multi<DataType, DistType>::getDistanceFrom(labelType label,
195+
double
196+
BruteForceIndex_Multi<DataType, DistType>::getDistanceFrom_Unsafe(labelType label,
196197
const void *vector_data) const {
197198

198199
auto IDs = this->labelToIdsLookup.find(label);

src/VecSim/algorithms/brute_force/brute_force_single.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class BruteForceIndex_Single : public BruteForceIndex<DataType, DistType> {
2424
int addVector(const void *vector_data, labelType label, void *auxiliaryCtx = nullptr) override;
2525
int deleteVector(labelType label) override;
2626
int deleteVectorById(labelType label, idType id) override;
27-
double getDistanceFrom(labelType label, const void *vector_data) const override;
27+
double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override;
2828

2929
inline std::unique_ptr<vecsim_stl::abstract_results_container>
3030
getNewResultsContainer(size_t cap) const override {
@@ -184,7 +184,8 @@ int BruteForceIndex_Single<DataType, DistType>::deleteVectorById(labelType label
184184
}
185185

186186
template <typename DataType, typename DistType>
187-
double BruteForceIndex_Single<DataType, DistType>::getDistanceFrom(labelType label,
187+
double
188+
BruteForceIndex_Single<DataType, DistType>::getDistanceFrom_Unsafe(labelType label,
188189
const void *vector_data) const {
189190

190191
auto optionalId = this->labelToIdLookup.find(label);

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>,
311311
inline auto safeGetEntryPointState() const;
312312
inline void lockIndexDataGuard() const;
313313
inline void unlockIndexDataGuard() const;
314+
inline void lockSharedIndexDataGuard() const;
315+
inline void unlockSharedIndexDataGuard() const;
314316
inline void lockNodeLinks(idType node_id) const;
315317
inline void unlockNodeLinks(idType node_id) const;
316318
inline void lockNodeLinks(ElementGraphData *node_data) const;
@@ -351,7 +353,6 @@ class HNSWIndex : public VecSimIndexAbstract<DistType>,
351353

352354
// Inline priority queue getter that need to be implemented by derived class.
353355
virtual inline candidatesLabelsMaxHeap<DistType> *getNewMaxPriorityQueue() const = 0;
354-
virtual double safeGetDistanceFrom(labelType label, const void *vector_data) const = 0;
355356

356357
#ifdef BUILD_TESTS
357358
/**
@@ -520,6 +521,16 @@ void HNSWIndex<DataType, DistType>::unlockIndexDataGuard() const {
520521
indexDataGuard.unlock();
521522
}
522523

524+
template <typename DataType, typename DistType>
525+
void HNSWIndex<DataType, DistType>::lockSharedIndexDataGuard() const {
526+
indexDataGuard.lock_shared();
527+
}
528+
529+
template <typename DataType, typename DistType>
530+
void HNSWIndex<DataType, DistType>::unlockSharedIndexDataGuard() const {
531+
indexDataGuard.unlock_shared();
532+
}
533+
523534
template <typename DataType, typename DistType>
524535
void HNSWIndex<DataType, DistType>::lockNodeLinks(ElementGraphData *node_data) const {
525536
node_data->neighborsGuard.lock();

src/VecSim/algorithms/hnsw/hnsw_multi.h

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
4242
return keys;
4343
};
4444

45-
template <bool Safe>
4645
inline double getDistanceFromInternal(labelType label, const void *vector_data) const;
4746

4847
public:
@@ -91,11 +90,8 @@ class HNSWIndex_Multi : public HNSWIndex<DataType, DistType> {
9190
inline std::vector<idType> markDelete(labelType label) override;
9291
inline bool safeCheckIfLabelExistsInIndex(labelType label,
9392
bool also_done_processing) const override;
94-
double getDistanceFrom(labelType label, const void *vector_data) const override {
95-
return getDistanceFromInternal<false>(label, vector_data);
96-
}
97-
double safeGetDistanceFrom(labelType label, const void *vector_data) const override {
98-
return getDistanceFromInternal<true>(label, vector_data);
93+
double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override {
94+
return getDistanceFromInternal(label, vector_data);
9995
}
10096
};
10197

@@ -112,38 +108,20 @@ size_t HNSWIndex_Multi<DataType, DistType>::indexLabelCount() const {
112108
* helper functions
113109
*/
114110

115-
// Depending on the value of the Safe template parameter, this function will either return a copy
116-
// of the argument or a reference to it.
117-
template <bool Safe, typename Arg>
118-
constexpr decltype(auto) getCopyOrReference(Arg &&arg) {
119-
if constexpr (Safe) {
120-
return std::decay_t<Arg>(arg);
121-
} else {
122-
return (arg);
123-
}
124-
}
125-
126111
template <typename DataType, typename DistType>
127-
template <bool Safe>
128112
double HNSWIndex_Multi<DataType, DistType>::getDistanceFromInternal(labelType label,
129113
const void *vector_data) const {
130114
DistType dist = INVALID_SCORE;
131115

132116
// Check if the label exists in the index, return invalid score if not.
133-
if (Safe)
134-
this->indexDataGuard.lock_shared();
135117
auto it = this->labelLookup.find(label);
136118
if (it == this->labelLookup.end()) {
137-
if (Safe)
138-
this->indexDataGuard.unlock_shared();
139119
return dist;
140120
}
141121

142122
// Get the vector of ids associated with the label.
143123
// Get a copy if `Safe` is true, otherwise get a reference.
144-
decltype(auto) IDs = getCopyOrReference<Safe>(it->second);
145-
if (Safe)
146-
this->indexDataGuard.unlock_shared();
124+
auto &IDs = it->second;
147125

148126
// Iterate over the ids and find the minimum distance.
149127
for (auto id : IDs) {

src/VecSim/algorithms/hnsw/hnsw_single.h

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ class HNSWIndex_Single : public HNSWIndex<DataType, DistType> {
2525
inline void resizeLabelLookup(size_t new_max_elements) override;
2626
inline vecsim_stl::set<labelType> getLabelsSet() const override;
2727

28-
template <bool Safe>
2928
inline double getDistanceFromInternal(labelType label, const void *vector_data) const;
3029

3130
public:
@@ -73,11 +72,8 @@ class HNSWIndex_Single : public HNSWIndex<DataType, DistType> {
7372
inline bool safeCheckIfLabelExistsInIndex(labelType label,
7473
bool also_done_processing = false) const override;
7574

76-
double getDistanceFrom(labelType label, const void *vector_data) const override {
77-
return getDistanceFromInternal<false>(label, vector_data);
78-
}
79-
double safeGetDistanceFrom(labelType label, const void *vector_data) const override {
80-
return getDistanceFromInternal<true>(label, vector_data);
75+
double getDistanceFrom_Unsafe(labelType label, const void *vector_data) const override {
76+
return getDistanceFromInternal(label, vector_data);
8177
}
8278
};
8379

@@ -106,22 +102,15 @@ inline vecsim_stl::set<labelType> HNSWIndex_Single<DataType, DistType>::getLabel
106102
};
107103

108104
template <typename DataType, typename DistType>
109-
template <bool Safe>
110105
double
111106
HNSWIndex_Single<DataType, DistType>::getDistanceFromInternal(labelType label,
112107
const void *vector_data) const {
113-
if (Safe)
114-
this->indexDataGuard.lock_shared();
115108

116109
auto it = labelLookup.find(label);
117110
if (it == labelLookup.end()) {
118-
if (Safe)
119-
this->indexDataGuard.unlock_shared();
120111
return INVALID_SCORE;
121112
}
122113
idType id = it->second;
123-
if (Safe)
124-
this->indexDataGuard.unlock_shared();
125114

126115
return this->distFunc(vector_data, this->getDataByInternalId(id), this->dim);
127116
}

src/VecSim/algorithms/hnsw/hnsw_tiered.h

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
187187
size_t indexSize() const override;
188188
size_t indexLabelCount() const override;
189189
size_t indexCapacity() const override;
190-
double getDistanceFrom(labelType label, const void *blob) const override;
190+
double getDistanceFrom_Unsafe(labelType label, const void *blob) const override;
191191
// Do nothing here, each tier (flat buffer and HNSW) should increase capacity for itself when
192192
// needed.
193193
VecSimIndexInfo info() const override;
@@ -210,6 +210,17 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
210210
"running asynchronous GC for tiered HNSW index");
211211
this->executeReadySwapJobs(this->pendingSwapJobsThreshold);
212212
}
213+
void acquireSharedLocks() override {
214+
this->flatIndexGuard.lock_shared();
215+
this->mainIndexGuard.lock_shared();
216+
this->getHNSWIndex()->lockSharedIndexDataGuard();
217+
}
218+
219+
void releaseSharedLocks() override {
220+
this->flatIndexGuard.unlock_shared();
221+
this->mainIndexGuard.unlock_shared();
222+
this->getHNSWIndex()->unlockSharedIndexDataGuard();
223+
}
213224
#ifdef BUILD_TESTS
214225
void getDataByLabel(labelType label, std::vector<std::vector<DataType>> &vectors_output) const;
215226
#endif
@@ -621,9 +632,9 @@ TieredHNSWIndex<DataType, DistType>::~TieredHNSWIndex() {
621632
template <typename DataType, typename DistType>
622633
size_t TieredHNSWIndex<DataType, DistType>::indexSize() const {
623634
this->flatIndexGuard.lock_shared();
624-
this->getHNSWIndex()->lockIndexDataGuard();
635+
this->getHNSWIndex()->lockSharedIndexDataGuard();
625636
size_t res = this->backendIndex->indexSize() + this->frontendIndex->indexSize();
626-
this->getHNSWIndex()->unlockIndexDataGuard();
637+
this->getHNSWIndex()->unlockSharedIndexDataGuard();
627638
this->flatIndexGuard.unlock_shared();
628639
return res;
629640
}
@@ -803,14 +814,18 @@ int TieredHNSWIndex<DataType, DistType>::deleteVector(labelType label) {
803814
// 3. label exists in both indexes - we may have some of the vectors with the same label in the flat
804815
// buffer only and some in the Main index only (and maybe temporal duplications).
805816
// So, we get the distance from both indexes and return the minimum.
817+
818+
// IMPORTANT: this should be called when the *tiered index locks are locked for shared ownership*,
819+
// along with HNSW index data guard lock. That is since the internal getDistanceFrom calls access
820+
// the indexes' data, and it is not safe to run insert/delete operation in parallel. Also, we avoid
821+
// acquiring the locks internally, since this is usually called for every vector individually, and
822+
// the overhead of acquiring and releasing the locks is significant in that case.
806823
template <typename DataType, typename DistType>
807-
double TieredHNSWIndex<DataType, DistType>::getDistanceFrom(labelType label,
808-
const void *blob) const {
824+
double TieredHNSWIndex<DataType, DistType>::getDistanceFrom_Unsafe(labelType label,
825+
const void *blob) const {
809826
// Try to get the distance from the flat buffer.
810827
// If the label doesn't exist, the distance will be NaN.
811-
this->flatIndexGuard.lock_shared();
812-
auto flat_dist = this->frontendIndex->getDistanceFrom(label, blob);
813-
this->flatIndexGuard.unlock_shared();
828+
auto flat_dist = this->frontendIndex->getDistanceFrom_Unsafe(label, blob);
814829

815830
// Optimization. TODO: consider having different implementations for single and multi indexes,
816831
// to avoid checking the index type on every query.
@@ -821,9 +836,7 @@ double TieredHNSWIndex<DataType, DistType>::getDistanceFrom(labelType label,
821836
}
822837

823838
// Try to get the distance from the Main index.
824-
this->mainIndexGuard.lock_shared();
825-
auto hnsw_dist = getHNSWIndex()->safeGetDistanceFrom(label, blob);
826-
this->mainIndexGuard.unlock_shared();
839+
auto hnsw_dist = getHNSWIndex()->getDistanceFrom_Unsafe(label, blob);
827840

828841
// Return the minimum distance that is not NaN.
829842
return std::fmin(flat_dist, hnsw_dist);

src/VecSim/vec_sim.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,9 @@ extern "C" int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label) {
118118
return index->deleteVector(label);
119119
}
120120

121-
extern "C" double VecSimIndex_GetDistanceFrom(VecSimIndex *index, size_t label, const void *blob) {
122-
return index->getDistanceFrom(label, blob);
121+
extern "C" double VecSimIndex_GetDistanceFrom_Unsafe(VecSimIndex *index, size_t label,
122+
const void *blob) {
123+
return index->getDistanceFrom_Unsafe(label, blob);
123124
}
124125

125126
extern "C" size_t VecSimIndex_EstimateElementSize(const VecSimParams *params) {
@@ -241,6 +242,14 @@ extern "C" void VecSimTieredIndex_GC(VecSimIndex *index) {
241242
}
242243
}
243244

245+
extern "C" void VecSimTieredIndex_AcquireSharedLocks(VecSimIndex *index) {
246+
index->acquireSharedLocks();
247+
}
248+
249+
extern "C" void VecSimTieredIndex_ReleaseSharedLocks(VecSimIndex *index) {
250+
index->releaseSharedLocks();
251+
}
252+
244253
extern "C" void VecSim_SetMemoryFunctions(VecSimMemoryFunctions memoryfunctions) {
245254
VecSimAllocator::setMemoryFunctions(memoryfunctions);
246255
}

src/VecSim/vec_sim.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label);
7474
* @brief Calculate the distance of a vector from an index to a vector. This function assumes that
7575
* the vector fits the index - its type and dimension are the same as the index's, and if the
7676
* index's distance metric is cosine, the vector is already normalized.
77+
* IMPORTANT: for tiered index, this should be called while *locks are locked for shared ownership*,
78+
* as we avoid acquiring the locks internally. That is since this is usually called for every vector
79+
* individually, and the overhead of acquiring and releasing the locks is significant in that case.
7780
* @param index the index from which the first vector is located, and that defines the distance
7881
* metric.
7982
* @param label the label of the vector in the index.
@@ -82,7 +85,7 @@ int VecSimIndex_DeleteVector(VecSimIndex *index, size_t label);
8285
* @return The distance (according to the index's distance metric) between `blob` and the vector
8386
* with label label`.
8487
*/
85-
double VecSimIndex_GetDistanceFrom(VecSimIndex *index, size_t label, const void *blob);
88+
double VecSimIndex_GetDistanceFrom_Unsafe(VecSimIndex *index, size_t label, const void *blob);
8689

8790
/**
8891
* @brief normalize the vector blob in place.
@@ -199,6 +202,15 @@ void VecSimTieredIndex_GC(VecSimIndex *index);
199202
bool VecSimIndex_PreferAdHocSearch(VecSimIndex *index, size_t subsetSize, size_t k,
200203
bool initial_check);
201204

205+
/**
206+
* @brief Acquire/Release the required locks of the tiered index externally before executing an
207+
* an unsafe *READ* operation (as the locks are acquired for shared ownership).
208+
* @param index the tiered index to protect (no nothing for non-tiered indexes).
209+
*/
210+
void VecSimTieredIndex_AcquireSharedLocks(VecSimIndex *index);
211+
212+
void VecSimTieredIndex_ReleaseSharedLocks(VecSimIndex *index);
213+
202214
/**
203215
* @brief Allow 3rd party memory functions to be used for memory management.
204216
*

src/VecSim/vec_sim_index.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,5 +236,7 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
236236
return this->newBatchIterator(processed_blob, queryParams);
237237
}
238238

239-
void runGC() override {} // Do nothing, relevant for tiered index only.
239+
void runGC() override {} // Do nothing, relevant for tiered index only.
240+
void acquireSharedLocks() override {} // Do nothing, relevant for tiered index only.
241+
void releaseSharedLocks() override {} // Do nothing, relevant for tiered index only.
240242
};

src/VecSim/vec_sim_interface.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ struct VecSimIndexInterface : public VecsimBaseObject {
7878
* @return The distance (according to the index's distance metric) between `blob` and the vector
7979
* with id `id`.
8080
*/
81-
virtual double getDistanceFrom(labelType id, const void *blob) const = 0;
81+
virtual double getDistanceFrom_Unsafe(labelType id, const void *blob) const = 0;
8282

8383
/**
8484
* @brief Return the number of vectors in the index (including ones that are marked as deleted).
@@ -220,6 +220,16 @@ struct VecSimIndexInterface : public VecsimBaseObject {
220220
*/
221221
virtual void runGC() = 0;
222222

223+
/**
224+
* @brief Acquire the locks for shared ownership in tiered async index.
225+
*/
226+
virtual void acquireSharedLocks() = 0;
227+
228+
/**
229+
* @brief Release the locks for shared ownership in tiered async index.
230+
*/
231+
virtual void releaseSharedLocks() = 0;
232+
223233
/**
224234
* @brief Allow 3rd party timeout callback to be used for limiting runtime of a query.
225235
*

0 commit comments

Comments
 (0)