From 7a6c10aef36247323ff3301a708d9b81a1abc5dc Mon Sep 17 00:00:00 2001 From: meiravgri Date: Thu, 8 May 2025 04:22:44 +0000 Subject: [PATCH 01/22] make CreatePreprocessorsContainerParams templated and move it to header file add logic to determine the cosine processed_bytes_count add processed_bytes_count to the cosine component change blob_size param to the original blob size instead of processed_bytes_count in all preprocessing functions. --- src/VecSim/CMakeLists.txt | 1 - .../components/components_factory.cpp | 23 -------- .../components/components_factory.h | 48 ++++++++++++++++- .../components/preprocessors_factory.h | 5 +- .../computer/preprocessor_container.cpp | 34 +++++++----- .../spaces/computer/preprocessor_container.h | 52 +++++++++---------- src/VecSim/spaces/computer/preprocessors.h | 47 +++++++++-------- tests/unit/test_components.cpp | 9 +++- 8 files changed, 126 insertions(+), 93 deletions(-) delete mode 100644 src/VecSim/index_factories/components/components_factory.cpp diff --git a/src/VecSim/CMakeLists.txt b/src/VecSim/CMakeLists.txt index 557851018..ab64066c9 100644 --- a/src/VecSim/CMakeLists.txt +++ b/src/VecSim/CMakeLists.txt @@ -31,7 +31,6 @@ add_library(VectorSimilarity ${VECSIM_LIBTYPE} index_factories/tiered_factory.cpp index_factories/svs_factory.cpp index_factories/index_factory.cpp - index_factories/components/components_factory.cpp algorithms/hnsw/visited_nodes_handler.cpp vec_sim.cpp vec_sim_debug.cpp diff --git a/src/VecSim/index_factories/components/components_factory.cpp b/src/VecSim/index_factories/components/components_factory.cpp deleted file mode 100644 index caf485005..000000000 --- a/src/VecSim/index_factories/components/components_factory.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ -#include "VecSim/index_factories/components/components_factory.h" - -PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric metric, size_t dim, - bool is_normalized, - unsigned char alignment) { - // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors - // and query blobs. - VecSimMetric pp_metric; - if (is_normalized && metric == VecSimMetric_Cosine) { - pp_metric = VecSimMetric_IP; - } else { - pp_metric = metric; - } - return {.metric = pp_metric, .dim = dim, .alignment = alignment}; -} diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h index 1135d85cc..23d4d93c7 100644 --- a/src/VecSim/index_factories/components/components_factory.h +++ b/src/VecSim/index_factories/components/components_factory.h @@ -14,9 +14,53 @@ #include "VecSim/index_factories/components/preprocessors_factory.h" #include "VecSim/spaces/computer/calculator.h" +/** + * @brief Creates parameters for a preprocessors container based on the given metric, dimension, + * normalization flag, and alignment. + * + * @tparam DataType The data type of the vector elements (e.g., float, int). + * @param metric The similarity metric to be used (e.g., Cosine, Inner Product). + * @param dim The dimensionality of the vectors. + * @param is_normalized A flag indicating whether the vectors are already normalized. + * @param alignment The alignment requirement for the data. + * @return A PreprocessorsContainerParams object containing the processed parameters: + * - metric: The adjusted metric based on the input and normalization flag. + * - dim: The dimensionality of the vectors. + * - alignment: The alignment requirement for the data. + * - processed_bytes_count: The size of the processed data blob in bytes. + * + * @details + * If the metric is Cosine and the data type is integral, the processed bytes count may include + * additional space for normalization (currently commented out). If the vectors are already + * normalized (is_normalized == true), the metric is adjusted to Inner Product (IP) to skip + * redundant normalization during preprocessing. + */ +template PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric metric, size_t dim, bool is_normalized, - unsigned char alignment); + unsigned char alignment) { + // By default the processed blob size is the same as the original blob size. + size_t processed_bytes_count = dim * sizeof(DataType); + + // If the index metric is Cosine, and + VecSimMetric pp_metric = metric; + if (metric == VecSimMetric_Cosine) { + // if metric is cosine and DataType is integral, the processed_bytes_count includes the + // norm appended to the vector. + if (std::is_integral::value) { + processed_bytes_count += sizeof(float); + } + // if is_normalized == true, we will enforce skipping normalizing vector and query blobs by + // setting the metric to IP. + if (is_normalized) { + pp_metric = VecSimMetric_IP; + } + } + return {.metric = pp_metric, + .dim = dim, + .alignment = alignment, + .processed_bytes_count = processed_bytes_count}; +} template IndexComponents @@ -29,7 +73,7 @@ CreateIndexComponents(std::shared_ptr allocator, VecSimMetric m auto indexCalculator = new (allocator) DistanceCalculatorCommon(allocator, distFunc); PreprocessorsContainerParams ppParams = - CreatePreprocessorsContainerParams(metric, dim, is_normalized, alignment); + CreatePreprocessorsContainerParams(metric, dim, is_normalized, alignment); auto preprocessors = CreatePreprocessorsContainer(allocator, ppParams); return {indexCalculator, preprocessors}; diff --git a/src/VecSim/index_factories/components/preprocessors_factory.h b/src/VecSim/index_factories/components/preprocessors_factory.h index ba8d2a286..c11e935d8 100644 --- a/src/VecSim/index_factories/components/preprocessors_factory.h +++ b/src/VecSim/index_factories/components/preprocessors_factory.h @@ -15,6 +15,7 @@ struct PreprocessorsContainerParams { VecSimMetric metric; size_t dim; unsigned char alignment; + size_t processed_bytes_count; }; template @@ -25,8 +26,8 @@ CreatePreprocessorsContainer(std::shared_ptr allocator, if (params.metric == VecSimMetric_Cosine) { auto multiPPContainer = new (allocator) MultiPreprocessorsContainer(allocator, params.alignment); - auto cosine_preprocessor = - new (allocator) CosinePreprocessor(allocator, params.dim); + auto cosine_preprocessor = new (allocator) + CosinePreprocessor(allocator, params.dim, params.processed_bytes_count); int next_valid_pp_index = multiPPContainer->addPreprocessor(cosine_preprocessor); UNUSED(next_valid_pp_index); assert(next_valid_pp_index != -1 && "Cosine preprocessor was not added correctly"); diff --git a/src/VecSim/spaces/computer/preprocessor_container.cpp b/src/VecSim/spaces/computer/preprocessor_container.cpp index 8b61e1107..9328a7c85 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.cpp +++ b/src/VecSim/spaces/computer/preprocessor_container.cpp @@ -8,38 +8,46 @@ */ #include "VecSim/spaces/computer/preprocessor_container.h" +/** + * +=========================== TODO ================= +original_blob_size should be a reference so it can be changed when changes in the pp chain. + + +*/ ProcessedBlobs PreprocessorsContainerAbstract::preprocess(const void *original_blob, - size_t processed_bytes_count) const { - return ProcessedBlobs(preprocessForStorage(original_blob, processed_bytes_count), - preprocessQuery(original_blob, processed_bytes_count)); + size_t original_blob_size) const { + return ProcessedBlobs(preprocessForStorage(original_blob, original_blob_size), + preprocessQuery(original_blob, original_blob_size)); } MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessForStorage(const void *original_blob, - size_t processed_bytes_count) const { + size_t original_blob_size) const { return wrapWithDummyDeleter(const_cast(original_blob)); } -MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery( - const void *original_blob, size_t processed_bytes_count, bool force_copy) const { - return maybeCopyToAlignedMem(original_blob, processed_bytes_count, force_copy); +MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(const void *original_blob, + size_t original_blob_size, + bool force_copy) const { + return maybeCopyToAlignedMem(original_blob, original_blob_size, force_copy); } void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob, - size_t processed_bytes_count) const {} + size_t input_blob_bytes_count) const {} void PreprocessorsContainerAbstract::preprocessStorageInPlace(void *blob, - size_t processed_bytes_count) const {} + size_t input_blob_bytes_count) const { +} MemoryUtils::unique_blob PreprocessorsContainerAbstract::maybeCopyToAlignedMem( - const void *original_blob, size_t blob_bytes_count, bool force_copy) const { + const void *original_blob, size_t original_blob_size, bool force_copy) const { bool needs_copy = force_copy || (this->alignment && ((uintptr_t)original_blob % this->alignment != 0)); if (needs_copy) { - auto aligned_mem = this->allocator->allocate_aligned(blob_bytes_count, this->alignment); - // TODO: handle original_blob_size != processed_bytes_count - memcpy(aligned_mem, original_blob, blob_bytes_count); + auto aligned_mem = this->allocator->allocate_aligned(original_blob_size, this->alignment); + memcpy(aligned_mem, original_blob, original_blob_size); return this->wrapAllocated(aligned_mem); } diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h index 5d80a321f..70591bd4b 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.h +++ b/src/VecSim/spaces/computer/preprocessor_container.h @@ -22,19 +22,17 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject { PreprocessorsContainerAbstract(std::shared_ptr allocator, unsigned char alignment) : VecsimBaseObject(allocator), alignment(alignment) {} - virtual ProcessedBlobs preprocess(const void *original_blob, - size_t processed_bytes_count) const; + virtual ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const; virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob, - size_t processed_bytes_count) const; + size_t original_blob_size) const; virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob, - size_t processed_bytes_count, + size_t original_blob_size, bool force_copy = false) const; - virtual void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const; - - virtual void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const; + virtual void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const; + virtual void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const; unsigned char getAlignment() const { return alignment; } @@ -43,7 +41,7 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject { // Allocate and copy the blob only if the original blob is not aligned. MemoryUtils::unique_blob maybeCopyToAlignedMem(const void *original_blob, - size_t blob_bytes_count, + size_t original_blob_size, bool force_copy = false) const; MemoryUtils::unique_blob wrapAllocated(void *blob) const { @@ -82,19 +80,17 @@ class MultiPreprocessorsContainer : public PreprocessorsContainerAbstract { */ int addPreprocessor(PreprocessorInterface *preprocessor); - ProcessedBlobs preprocess(const void *original_blob, - size_t processed_bytes_count) const override; + ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const override; MemoryUtils::unique_blob preprocessForStorage(const void *original_blob, - size_t processed_bytes_count) const override; + size_t original_blob_size) const override; - MemoryUtils::unique_blob preprocessQuery(const void *original_blob, - size_t processed_bytes_count, + MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t original_blob_size, bool force_copy = false) const override; - void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const override; + void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const override; - void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override; + void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override; #ifdef BUILD_TESTS std::array getPreprocessors() const { @@ -160,11 +156,11 @@ int MultiPreprocessorsContainer::addPreprocessor( template ProcessedBlobs MultiPreprocessorsContainer::preprocess( - const void *original_blob, size_t processed_bytes_count) const { + const void *original_blob, size_t original_blob_size) const { // No preprocessors were added yet. if (preprocessors[0] == nullptr) { // query might need to be aligned - auto query_ptr = this->maybeCopyToAlignedMem(original_blob, processed_bytes_count); + auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size); return ProcessedBlobs( std::move(Base::wrapWithDummyDeleter(const_cast(original_blob))), std::move(query_ptr)); @@ -175,7 +171,7 @@ ProcessedBlobs MultiPreprocessorsContainer::preproces for (auto pp : preprocessors) { if (!pp) break; - pp->preprocess(original_blob, storage_blob, query_blob, processed_bytes_count, + pp->preprocess(original_blob, storage_blob, query_blob, original_blob_size, this->alignment); } // At least one blob was allocated. @@ -194,7 +190,7 @@ ProcessedBlobs MultiPreprocessorsContainer::preproces if (query_blob == nullptr) { // we processed only the storage // query might need to be aligned - auto query_ptr = this->maybeCopyToAlignedMem(original_blob, processed_bytes_count); + auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size); return ProcessedBlobs(std::move(this->wrapAllocated(storage_blob)), std::move(query_ptr)); } @@ -206,13 +202,13 @@ ProcessedBlobs MultiPreprocessorsContainer::preproces template MemoryUtils::unique_blob MultiPreprocessorsContainer::preprocessForStorage( - const void *original_blob, size_t processed_bytes_count) const { + const void *original_blob, size_t original_blob_size) const { void *storage_blob = nullptr; for (auto pp : preprocessors) { if (!pp) break; - pp->preprocessForStorage(original_blob, storage_blob, processed_bytes_count); + pp->preprocessForStorage(original_blob, storage_blob, original_blob_size); } return storage_blob ? std::move(this->wrapAllocated(storage_blob)) @@ -221,40 +217,40 @@ MultiPreprocessorsContainer::preprocessForStorage( template MemoryUtils::unique_blob MultiPreprocessorsContainer::preprocessQuery( - const void *original_blob, size_t processed_bytes_count, bool force_copy) const { + const void *original_blob, size_t original_blob_size, bool force_copy) const { void *query_blob = nullptr; for (auto pp : preprocessors) { if (!pp) break; // modifies the memory in place - pp->preprocessQuery(original_blob, query_blob, processed_bytes_count, this->alignment); + pp->preprocessQuery(original_blob, query_blob, original_blob_size, this->alignment); } return query_blob ? std::move(this->wrapAllocated(query_blob)) - : std::move(this->maybeCopyToAlignedMem(original_blob, processed_bytes_count, + : std::move(this->maybeCopyToAlignedMem(original_blob, original_blob_size, force_copy)); } template void MultiPreprocessorsContainer::preprocessQueryInPlace( - void *blob, size_t processed_bytes_count) const { + void *blob, size_t input_blob_bytes_count) const { for (auto pp : preprocessors) { if (!pp) break; // modifies the memory in place - pp->preprocessQueryInPlace(blob, processed_bytes_count, this->alignment); + pp->preprocessQueryInPlace(blob, input_blob_bytes_count, this->alignment); } } template void MultiPreprocessorsContainer::preprocessStorageInPlace( - void *blob, size_t processed_bytes_count) const { + void *blob, size_t input_blob_bytes_count) const { for (auto pp : preprocessors) { if (!pp) break; // modifies the memory in place - pp->preprocessStorageInPlace(blob, processed_bytes_count); + pp->preprocessStorageInPlace(blob, input_blob_bytes_count); } } diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index db2de5ad0..d97cf57ca 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -29,29 +29,32 @@ class PreprocessorInterface : public VecsimBaseObject { // down the preprocessors pipeline (such as in quantization preprocessor that compresses the // vector). virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const = 0; + size_t original_blob_size, unsigned char alignment) const = 0; virtual void preprocessForStorage(const void *original_blob, void *&storage_blob, - size_t processed_bytes_count) const = 0; + size_t original_blob_size) const = 0; virtual void preprocessQuery(const void *original_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const = 0; - virtual void preprocessQueryInPlace(void *original_blob, size_t processed_bytes_count, + size_t original_blob_size, unsigned char alignment) const = 0; + virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_bytes_count, unsigned char alignment) const = 0; virtual void preprocessStorageInPlace(void *original_blob, - size_t processed_bytes_count) const = 0; + size_t input_blob_bytes_count) const = 0; }; template class CosinePreprocessor : public PreprocessorInterface { public: - CosinePreprocessor(std::shared_ptr allocator, size_t dim) + // This preprocessor assumes storage_blob and query_blob + // both are preprocessed in the same way, and yield a blob in the same size. + CosinePreprocessor(std::shared_ptr allocator, size_t dim, + size_t processed_bytes_count) : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc()), - dim(dim) {} + dim(dim), processed_bytes_count(processed_bytes_count) {} // If a blob (storage_blob or query_blob) is not nullptr, it means a previous preprocessor // already allocated and processed it. So, we process it inplace. If it's null, we need to // allocate memory for it and copy the original_blob to it. void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const override { + size_t original_blob_size, unsigned char alignment) const override { // Case 1: Blobs are different (one might be null, or both are allocated and processed // separately). @@ -59,12 +62,10 @@ class CosinePreprocessor : public PreprocessorInterface { // If one of them is null, allocate memory for it and copy the original_blob to it. if (storage_blob == nullptr) { storage_blob = this->allocator->allocate(processed_bytes_count); - // TODO: handle original_blob_size != processed_bytes_count - memcpy(storage_blob, original_blob, processed_bytes_count); + memcpy(storage_blob, original_blob, original_blob_size); } else if (query_blob == nullptr) { query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - // TODO: handle original_blob_size != processed_bytes_count - memcpy(query_blob, original_blob, processed_bytes_count); + memcpy(query_blob, original_blob, original_blob_size); } // Normalize both blobs. @@ -74,8 +75,7 @@ class CosinePreprocessor : public PreprocessorInterface { if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set // storage_blob to point to it. query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - // TODO: handle original_blob_size != processed_bytes_count - memcpy(query_blob, original_blob, processed_bytes_count); + memcpy(query_blob, original_blob, original_blob_size); storage_blob = query_blob; } // normalize one of them (since they point to the same memory). @@ -84,37 +84,40 @@ class CosinePreprocessor : public PreprocessorInterface { } void preprocessForStorage(const void *original_blob, void *&blob, - size_t processed_bytes_count) const override { + size_t original_blob_size) const override { if (blob == nullptr) { blob = this->allocator->allocate(processed_bytes_count); - // TODO: handle original_blob_size != processed_bytes_count - memcpy(blob, original_blob, processed_bytes_count); + memcpy(blob, original_blob, original_blob_size); } normalize_func(blob, this->dim); } - void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count, + void preprocessQuery(const void *original_blob, void *&blob, size_t original_blob_size, unsigned char alignment) const override { if (blob == nullptr) { blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - // TODO: handle original_blob_size != processed_bytes_count - memcpy(blob, original_blob, processed_bytes_count); + memcpy(blob, original_blob, original_blob_size); } normalize_func(blob, this->dim); } - void preprocessQueryInPlace(void *blob, size_t processed_bytes_count, + void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count, unsigned char alignment) const override { assert(blob); + // TODO: replace with a debug assert and add values of input_blob_bytes_count and + // processed_bytes_count + assert(input_blob_bytes_count == this->processed_bytes_count); normalize_func(blob, this->dim); } - void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override { + void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override { assert(blob); + assert(input_blob_bytes_count == this->processed_bytes_count); normalize_func(blob, this->dim); } private: spaces::normalizeVector_f normalize_func; const size_t dim; + const size_t processed_bytes_count; }; diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 95679a906..5c0f3c0de 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -471,12 +471,15 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) { float value_to_add_storage = 7.0f; float value_to_add_query = 2.0f; const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value}; + // TODo: change this test so that original_blob_size != processed_bytes_count + constexpr size_t processed_bytes_count = sizeof(original_blob); auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); // adding cosine preprocessor - auto cosine_preprocessor = new (allocator) CosinePreprocessor(allocator, dim); + auto cosine_preprocessor = + new (allocator) CosinePreprocessor(allocator, dim, processed_bytes_count); multiPPContainer.addPreprocessor(cosine_preprocessor); { ProcessedBlobs processed_blobs = @@ -536,6 +539,7 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { float value_to_add_storage = 7.0f; float value_to_add_query = 2.0f; const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value}; + constexpr size_t processed_bytes_count = sizeof(original_blob); // Creating multi preprocessors container auto mixed_preprocessor = new (allocator) @@ -568,7 +572,8 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { } // adding cosine preprocessor - auto cosine_preprocessor = new (allocator) CosinePreprocessor(allocator, dim); + auto cosine_preprocessor = + new (allocator) CosinePreprocessor(allocator, dim, processed_bytes_count); multiPPContainer.addPreprocessor(cosine_preprocessor); { ProcessedBlobs processed_blobs = From cc4281a76b906b54f99c90d7bd2a417ec68d8f07 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Thu, 8 May 2025 13:43:23 +0000 Subject: [PATCH 02/22] plan for the tests --- tests/unit/test_components.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 5c0f3c0de..57b7e578e 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -602,3 +602,20 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { } // The preprocessors should be released by the preprocessors container. } + +TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { + // add cosine pp that changes the original blob size + // add a pp that preprocesses the normalized blob (same size) + // add a pp that changes the storage_blob size, but not changing the query_blob size +} + + +TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { + // add a pp that changes the storage_blob size, but not changing the query_blob size + // add a pp that preprocesses the normalized blob (same size) + // add cosine pp that changes the original blob size +} + +TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { + // pp multi container where cosine is only needed for the query blob (not supported yet) +} From 86a44a967b9f46b72bafd6f3151ec06cf43bb569 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Mon, 12 May 2025 10:59:40 +0000 Subject: [PATCH 03/22] rename original_blob_size-> input_blob_size add DummyChangeAllocSizePreprocessor class to test pp that changes the size add tests one will fail because the input blob size is not updated --- .../spaces/computer/preprocessor_container.h | 2 + src/VecSim/spaces/computer/preprocessors.h | 37 ++- tests/unit/test_components.cpp | 280 +++++++++++++++++- 3 files changed, 284 insertions(+), 35 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h index 70591bd4b..80453ce19 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.h +++ b/src/VecSim/spaces/computer/preprocessor_container.h @@ -22,11 +22,13 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject { PreprocessorsContainerAbstract(std::shared_ptr allocator, unsigned char alignment) : VecsimBaseObject(allocator), alignment(alignment) {} + // It is assumed that the resulted query blob is aligned. virtual ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const; virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob, size_t original_blob_size) const; + // It is assumed that the resulted query blob is aligned. virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t original_blob_size, bool force_copy = false) const; diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index d97cf57ca..515a94ef0 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -29,15 +29,14 @@ class PreprocessorInterface : public VecsimBaseObject { // down the preprocessors pipeline (such as in quantization preprocessor that compresses the // vector). virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t original_blob_size, unsigned char alignment) const = 0; + size_t input_blob_size, unsigned char alignment) const = 0; virtual void preprocessForStorage(const void *original_blob, void *&storage_blob, - size_t original_blob_size) const = 0; + size_t input_blob_size) const = 0; virtual void preprocessQuery(const void *original_blob, void *&query_blob, - size_t original_blob_size, unsigned char alignment) const = 0; - virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_bytes_count, + size_t input_blob_size, unsigned char alignment) const = 0; + virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_size, unsigned char alignment) const = 0; - virtual void preprocessStorageInPlace(void *original_blob, - size_t input_blob_bytes_count) const = 0; + virtual void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const = 0; }; template @@ -54,7 +53,7 @@ class CosinePreprocessor : public PreprocessorInterface { // already allocated and processed it. So, we process it inplace. If it's null, we need to // allocate memory for it and copy the original_blob to it. void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t original_blob_size, unsigned char alignment) const override { + size_t input_blob_size, unsigned char alignment) const override { // Case 1: Blobs are different (one might be null, or both are allocated and processed // separately). @@ -62,10 +61,10 @@ class CosinePreprocessor : public PreprocessorInterface { // If one of them is null, allocate memory for it and copy the original_blob to it. if (storage_blob == nullptr) { storage_blob = this->allocator->allocate(processed_bytes_count); - memcpy(storage_blob, original_blob, original_blob_size); + memcpy(storage_blob, original_blob, input_blob_size); } else if (query_blob == nullptr) { query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(query_blob, original_blob, original_blob_size); + memcpy(query_blob, original_blob, input_blob_size); } // Normalize both blobs. @@ -75,7 +74,7 @@ class CosinePreprocessor : public PreprocessorInterface { if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set // storage_blob to point to it. query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(query_blob, original_blob, original_blob_size); + memcpy(query_blob, original_blob, input_blob_size); storage_blob = query_blob; } // normalize one of them (since they point to the same memory). @@ -84,35 +83,35 @@ class CosinePreprocessor : public PreprocessorInterface { } void preprocessForStorage(const void *original_blob, void *&blob, - size_t original_blob_size) const override { + size_t input_blob_size) const override { if (blob == nullptr) { blob = this->allocator->allocate(processed_bytes_count); - memcpy(blob, original_blob, original_blob_size); + memcpy(blob, original_blob, input_blob_size); } normalize_func(blob, this->dim); } - void preprocessQuery(const void *original_blob, void *&blob, size_t original_blob_size, + void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size, unsigned char alignment) const override { if (blob == nullptr) { blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(blob, original_blob, original_blob_size); + memcpy(blob, original_blob, input_blob_size); } normalize_func(blob, this->dim); } - void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count, + void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { assert(blob); - // TODO: replace with a debug assert and add values of input_blob_bytes_count and + // TODO: replace with a debug assert and add values of input_blob_size and // processed_bytes_count - assert(input_blob_bytes_count == this->processed_bytes_count); + assert(input_blob_size == this->processed_bytes_count); normalize_func(blob, this->dim); } - void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override { + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override { assert(blob); - assert(input_blob_bytes_count == this->processed_bytes_count); + assert(input_blob_size == this->processed_bytes_count); normalize_func(blob, this->dim); } diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 57b7e578e..99441666f 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -12,6 +12,7 @@ #include "VecSim/spaces/computer/preprocessor_container.h" #include "VecSim/spaces/computer/calculator.h" #include "unit_test_utils.h" +#include "tests_utils.h" class IndexCalculatorTest : public ::testing::Test {}; namespace dummyCalcultor { @@ -57,8 +58,8 @@ enum pp_mode { STORAGE_ONLY, QUERY_ONLY, BOTH, EMPTY }; template class DummyStoragePreprocessor : public PreprocessorInterface { public: - DummyStoragePreprocessor(std::shared_ptr allocator, int value_to_add_storage, - int value_to_add_query = 0) + DummyStoragePreprocessor(std::shared_ptr allocator, + DataType value_to_add_storage, DataType value_to_add_query = 0) : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage), value_to_add_query(value_to_add_query) { if (!value_to_add_query) @@ -94,8 +95,8 @@ class DummyStoragePreprocessor : public PreprocessorInterface { } private: - int value_to_add_storage; - int value_to_add_query; + DataType value_to_add_storage; + DataType value_to_add_query; }; // Dummy query preprocessor @@ -191,6 +192,124 @@ class DummyMixedPreprocessor : public PreprocessorInterface { int value_to_add_storage; int value_to_add_query; }; + +// TODO: test increase allocation size ( we don't really need another pp class for this) +// A preprocessor that changes the allocation size of the blobs in the same manner. +// set excess bytes to (char)2 +template +class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { +private: + size_t processed_bytes_count; + static constexpr unsigned char excess_value = 2; + +public: + DummyChangeAllocSizePreprocessor(std::shared_ptr allocator, + size_t processed_bytes_count) + : PreprocessorInterface(allocator), processed_bytes_count(processed_bytes_count) {} + + static constexpr unsigned char getExcessValue() { return excess_value; } + + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t input_blob_size, unsigned char alignment) const override { + // if the blobs are equal, + if (storage_blob == query_blob) { + preprocessGeneral(original_blob, storage_blob, input_blob_size, alignment); + query_blob = storage_blob; + return; + } + // if the blobs are not equal + if (input_blob_size < processed_bytes_count) { + auto alloc_and_process = [&](void *&blob) { + auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + if (blob == nullptr) { + memcpy(new_blob, original_blob, input_blob_size); + } else { + // copy the blob to the new blob, and free the old one + memcpy(new_blob, blob, input_blob_size); + this->allocator->free_allocation(blob); + } + blob = new_blob; + memset((char *)blob + input_blob_size, excess_value, + processed_bytes_count - input_blob_size); + }; + + alloc_and_process(storage_blob); + alloc_and_process(query_blob); + } else { + auto alloc_and_process = [&](void *&blob) { + if (blob == nullptr) { + blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + memcpy(blob, original_blob, processed_bytes_count); + } else { + memset((char *)blob + processed_bytes_count, excess_value, + input_blob_size - processed_bytes_count); + } + }; + + alloc_and_process(storage_blob); + alloc_and_process(query_blob); + } + } + + void preprocessForStorage(const void *original_blob, void *&blob, + size_t input_blob_size) const override { + + this->preprocessGeneral(original_blob, blob, processed_bytes_count); + } + void preprocessQueryInPlace(void *blob, size_t input_blob_size, + unsigned char alignment) const override { + // only supported if the blob in already large enough + assert(input_blob_size >= processed_bytes_count); + // set excess bytes to 0 + memset((char *)blob + processed_bytes_count, excess_value, + input_blob_size - processed_bytes_count); + } + + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override { + // only supported if the blob in already large enough + assert(input_blob_size >= processed_bytes_count); + // set excess bytes to 0 + memset((char *)blob + processed_bytes_count, excess_value, + input_blob_size - processed_bytes_count); + } + void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size, + unsigned char alignment) const override { + this->preprocessGeneral(original_blob, blob, processed_bytes_count, alignment); + } + +private: + void preprocessGeneral(const void *original_blob, void *&blob, size_t input_blob_size, + unsigned char alignment = 0) const { + // if the size of the input is not enough. + if (input_blob_size < processed_bytes_count) { + // calloc doesn't have an alignment version, so we need to allocate aligned memory and + // cset the excess bytes to 0. + auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + if (blob == nullptr) { + // copy thr original blob + memcpy(new_blob, original_blob, input_blob_size); + } else { + // copy the blob to the new blob + memcpy(new_blob, blob, input_blob_size); + this->allocator->free_allocation(blob); + } + blob = new_blob; + // set excess bytes to 0 + memset((char *)blob + input_blob_size, excess_value, + processed_bytes_count - input_blob_size); + } else { // input size is larger than output + if (blob == nullptr) { + blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + memcpy(blob, original_blob, processed_bytes_count); + } else { + // set excess bytes to 0 + memset((char *)blob + processed_bytes_count, excess_value, + input_blob_size - processed_bytes_count); + } + } + } + // TODO: change** the refernce** input_blob_size to processed_bytes_count +}; } // namespace dummyPreprocessors TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) { @@ -207,8 +326,6 @@ TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) { unsigned char address_alignment = (uintptr_t)(aligned_query.get()) % alignment; ASSERT_EQ(address_alignment, 0); } - - // The index computer is responsible for releasing the distance calculator. } template @@ -602,20 +719,151 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { } // The preprocessors should be released by the preprocessors container. } +TEST(PreprocessorsTest, decrease_size_STORAGE_then_cosine_no_size_change) {} +TEST(PreprocessorsTest, decrease_size_QUERY_then_cosine_no_size_change) {} -TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { - // add cosine pp that changes the original blob size - // add a pp that preprocesses the normalized blob (same size) - // add a pp that changes the storage_blob size, but not changing the query_blob size +TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) { + using namespace dummyPreprocessors; + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + + constexpr size_t n_preprocessors = 2; + constexpr size_t alignment = 5; + constexpr size_t elements = 8; + constexpr size_t decrease_amount = 2; + constexpr size_t new_elem_amount = elements - decrease_amount; + + // valgrind detects out of bound reads only if the considered memory is allocated on the heap, + // rather than on the stack. + constexpr size_t original_blob_size = elements * sizeof(float); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + float *original_blob = static_cast(original_blob_alloc.get()); + test_utils::populate_float_vec(original_blob, elements); + constexpr size_t new_processed_bytes_count = + original_blob_size - decrease_amount * sizeof(float); + + // Processed blob expected output + float expected_processed_blob[elements] = {0}; + memcpy(expected_processed_blob, original_blob, new_processed_bytes_count); + // Only use half of the blob for normalization + VecSim_Normalize(expected_processed_blob, new_elem_amount, VecSimType_FLOAT32); + + // A pp that decreases the original blob size + auto decrease_size_preprocessor = new (allocator) + DummyChangeAllocSizePreprocessor(allocator, new_processed_bytes_count); + // A normalize pp + auto cosine_preprocessor = new (allocator) + CosinePreprocessor(allocator, new_elem_amount, new_processed_bytes_count); + // Creating multi preprocessors container + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(decrease_size_preprocessor); + multiPPContainer.addPreprocessor(cosine_preprocessor); + + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_EQ(storage_blob, query_blob); + ASSERT_NE(storage_blob, nullptr); + + // memory should be aligned + unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; + ASSERT_EQ(address_alignment, 0); + + // They need to be allocated and processed + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_blob, new_elem_amount)); + } } +TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { + using namespace dummyPreprocessors; + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { - // add a pp that changes the storage_blob size, but not changing the query_blob size - // add a pp that preprocesses the normalized blob (same size) - // add cosine pp that changes the original blob size + constexpr size_t n_preprocessors = 2; + constexpr size_t alignment = 5; + constexpr size_t elements = 8; + + // valgrind detects out of bound reads only if the considered memory is allocated on the heap, + // rather than on the stack. + constexpr size_t original_blob_size = elements * sizeof(int8_t); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + int8_t *original_blob = static_cast(original_blob_alloc.get()); + test_utils::populate_int8_vec(original_blob, elements); + // size after normalization + constexpr size_t normalized_blob_bytes_count = original_blob_size + sizeof(float); + // size after increasing pp + constexpr size_t elements_addition = 3; + constexpr size_t final_blob_bytes_count = + normalized_blob_bytes_count + elements_addition * sizeof(unsigned char); + + // Processed blob expected output + int8_t expected_processed_blob[elements + sizeof(float) + 3] = {0}; + memcpy(expected_processed_blob, original_blob, original_blob_size); + // normalize the original blob + VecSim_Normalize(expected_processed_blob, elements, VecSimType_INT8); + // add the values of the pp that increases the blob size + unsigned char added_value = DummyChangeAllocSizePreprocessor::getExcessValue(); + for (size_t i = 0; i < elements_addition; i++) { + expected_processed_blob[elements + sizeof(float) + i] = added_value; + } + + // A normalize pp - will allocate the blob + sizeof(float) + auto cosine_preprocessor = new (allocator) + CosinePreprocessor(allocator, elements, normalized_blob_bytes_count); + // A pp that will increase the *normalized* blob size + auto increase_size_preprocessor = + new (allocator) DummyChangeAllocSizePreprocessor(allocator, final_blob_bytes_count); + // Creating multi preprocessors container + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(cosine_preprocessor); + multiPPContainer.addPreprocessor(increase_size_preprocessor); + + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, sizeof(original_blob)); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_EQ(storage_blob, query_blob); + ASSERT_NE(storage_blob, nullptr); + + // memory should be aligned + unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; + ASSERT_EQ(address_alignment, 0); + + // They need to be allocated and processed + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_blob, + final_blob_bytes_count)); + } } -TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { - // pp multi container where cosine is only needed for the query blob (not supported yet) +TEST(PreprocessorsTest, cosine_then_change_size) { + // cosine (not changing) + // pp that changes the blob size } + +TEST(PreprocessorsTest, cosine_change_then_pp_change) { + // cosine ( changing) + // pp that also changes the blob size +} + +// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { +// // add cosine pp that changes the original blob size +// // add a pp that preprocesses the normalized blob (same size) +// // add a pp that changes the storage_blob size, but not changing the query_blob size +// } + +// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { +// // add a pp that changes the storage_blob size, but not changing the query_blob size +// // add a pp that preprocesses the normalized blob (same size) +// // add cosine pp that changes the original blob size +// } + +// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { +// // pp multi container where cosine is only needed for the query blob (not supported yet) +// } From 3e15e762d8fa77434b8afab6115cc9a4d80ca037 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Mon, 12 May 2025 14:44:28 +0000 Subject: [PATCH 04/22] preprocessors now change the blob size --- .../components/components_factory.h | 3 +- .../computer/preprocessor_container.cpp | 30 ++++------ .../spaces/computer/preprocessor_container.h | 56 +++++++++---------- src/VecSim/spaces/computer/preprocessors.h | 23 +++++--- 4 files changed, 54 insertions(+), 58 deletions(-) diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h index 23d4d93c7..066bd0bca 100644 --- a/src/VecSim/index_factories/components/components_factory.h +++ b/src/VecSim/index_factories/components/components_factory.h @@ -31,7 +31,7 @@ * * @details * If the metric is Cosine and the data type is integral, the processed bytes count may include - * additional space for normalization (currently commented out). If the vectors are already + * additional space for normalization. If the vectors are already * normalized (is_normalized == true), the metric is adjusted to Inner Product (IP) to skip * redundant normalization during preprocessing. */ @@ -42,7 +42,6 @@ PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric met // By default the processed blob size is the same as the original blob size. size_t processed_bytes_count = dim * sizeof(DataType); - // If the index metric is Cosine, and VecSimMetric pp_metric = metric; if (metric == VecSimMetric_Cosine) { // if metric is cosine and DataType is integral, the processed_bytes_count includes the diff --git a/src/VecSim/spaces/computer/preprocessor_container.cpp b/src/VecSim/spaces/computer/preprocessor_container.cpp index 9328a7c85..0ec44e36d 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.cpp +++ b/src/VecSim/spaces/computer/preprocessor_container.cpp @@ -8,46 +8,38 @@ */ #include "VecSim/spaces/computer/preprocessor_container.h" -/** - * -=========================== TODO ================= -original_blob_size should be a reference so it can be changed when changes in the pp chain. - - -*/ ProcessedBlobs PreprocessorsContainerAbstract::preprocess(const void *original_blob, - size_t original_blob_size) const { - return ProcessedBlobs(preprocessForStorage(original_blob, original_blob_size), - preprocessQuery(original_blob, original_blob_size)); + size_t input_blob_size) const { + return ProcessedBlobs(preprocessForStorage(original_blob, input_blob_size), + preprocessQuery(original_blob, input_blob_size)); } MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessForStorage(const void *original_blob, - size_t original_blob_size) const { + size_t input_blob_size) const { return wrapWithDummyDeleter(const_cast(original_blob)); } MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(const void *original_blob, - size_t original_blob_size, + size_t input_blob_size, bool force_copy) const { - return maybeCopyToAlignedMem(original_blob, original_blob_size, force_copy); + return maybeCopyToAlignedMem(original_blob, input_blob_size, force_copy); } void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob, - size_t input_blob_bytes_count) const {} + size_t input_blob_size) const {} void PreprocessorsContainerAbstract::preprocessStorageInPlace(void *blob, - size_t input_blob_bytes_count) const { -} + size_t input_blob_size) const {} MemoryUtils::unique_blob PreprocessorsContainerAbstract::maybeCopyToAlignedMem( - const void *original_blob, size_t original_blob_size, bool force_copy) const { + const void *original_blob, size_t input_blob_size, bool force_copy) const { bool needs_copy = force_copy || (this->alignment && ((uintptr_t)original_blob % this->alignment != 0)); if (needs_copy) { - auto aligned_mem = this->allocator->allocate_aligned(original_blob_size, this->alignment); - memcpy(aligned_mem, original_blob, original_blob_size); + auto aligned_mem = this->allocator->allocate_aligned(input_blob_size, this->alignment); + memcpy(aligned_mem, original_blob, input_blob_size); return this->wrapAllocated(aligned_mem); } diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h index 80453ce19..cd2f0dec8 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.h +++ b/src/VecSim/spaces/computer/preprocessor_container.h @@ -23,18 +23,18 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject { unsigned char alignment) : VecsimBaseObject(allocator), alignment(alignment) {} // It is assumed that the resulted query blob is aligned. - virtual ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const; + virtual ProcessedBlobs preprocess(const void *original_blob, size_t input_blob_size) const; virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob, - size_t original_blob_size) const; + size_t input_blob_size) const; // It is assumed that the resulted query blob is aligned. virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob, - size_t original_blob_size, + size_t input_blob_size, bool force_copy = false) const; - virtual void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const; - virtual void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const; + virtual void preprocessQueryInPlace(void *blob, size_t input_blob_size) const; + virtual void preprocessStorageInPlace(void *blob, size_t input_blob_size) const; unsigned char getAlignment() const { return alignment; } @@ -43,7 +43,7 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject { // Allocate and copy the blob only if the original blob is not aligned. MemoryUtils::unique_blob maybeCopyToAlignedMem(const void *original_blob, - size_t original_blob_size, + size_t input_blob_size, bool force_copy = false) const; MemoryUtils::unique_blob wrapAllocated(void *blob) const { @@ -82,17 +82,17 @@ class MultiPreprocessorsContainer : public PreprocessorsContainerAbstract { */ int addPreprocessor(PreprocessorInterface *preprocessor); - ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const override; + ProcessedBlobs preprocess(const void *original_blob, size_t input_blob_size) const override; MemoryUtils::unique_blob preprocessForStorage(const void *original_blob, - size_t original_blob_size) const override; + size_t input_blob_size) const override; - MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t original_blob_size, + MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t input_blob_size, bool force_copy = false) const override; - void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const override; + void preprocessQueryInPlace(void *blob, size_t input_blob_size) const override; - void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override; + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override; #ifdef BUILD_TESTS std::array getPreprocessors() const { @@ -157,12 +157,13 @@ int MultiPreprocessorsContainer::addPreprocessor( } template -ProcessedBlobs MultiPreprocessorsContainer::preprocess( - const void *original_blob, size_t original_blob_size) const { +ProcessedBlobs +MultiPreprocessorsContainer::preprocess(const void *original_blob, + size_t input_blob_size) const { // No preprocessors were added yet. if (preprocessors[0] == nullptr) { // query might need to be aligned - auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size); + auto query_ptr = this->maybeCopyToAlignedMem(original_blob, input_blob_size); return ProcessedBlobs( std::move(Base::wrapWithDummyDeleter(const_cast(original_blob))), std::move(query_ptr)); @@ -173,8 +174,7 @@ ProcessedBlobs MultiPreprocessorsContainer::preproces for (auto pp : preprocessors) { if (!pp) break; - pp->preprocess(original_blob, storage_blob, query_blob, original_blob_size, - this->alignment); + pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment); } // At least one blob was allocated. @@ -192,7 +192,7 @@ ProcessedBlobs MultiPreprocessorsContainer::preproces if (query_blob == nullptr) { // we processed only the storage // query might need to be aligned - auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size); + auto query_ptr = this->maybeCopyToAlignedMem(original_blob, input_blob_size); return ProcessedBlobs(std::move(this->wrapAllocated(storage_blob)), std::move(query_ptr)); } @@ -204,13 +204,13 @@ ProcessedBlobs MultiPreprocessorsContainer::preproces template MemoryUtils::unique_blob MultiPreprocessorsContainer::preprocessForStorage( - const void *original_blob, size_t original_blob_size) const { + const void *original_blob, size_t input_blob_size) const { void *storage_blob = nullptr; for (auto pp : preprocessors) { if (!pp) break; - pp->preprocessForStorage(original_blob, storage_blob, original_blob_size); + pp->preprocessForStorage(original_blob, storage_blob, input_blob_size); } return storage_blob ? std::move(this->wrapAllocated(storage_blob)) @@ -219,40 +219,40 @@ MultiPreprocessorsContainer::preprocessForStorage( template MemoryUtils::unique_blob MultiPreprocessorsContainer::preprocessQuery( - const void *original_blob, size_t original_blob_size, bool force_copy) const { + const void *original_blob, size_t input_blob_size, bool force_copy) const { void *query_blob = nullptr; for (auto pp : preprocessors) { if (!pp) break; // modifies the memory in place - pp->preprocessQuery(original_blob, query_blob, original_blob_size, this->alignment); + pp->preprocessQuery(original_blob, query_blob, input_blob_size, this->alignment); } - return query_blob ? std::move(this->wrapAllocated(query_blob)) - : std::move(this->maybeCopyToAlignedMem(original_blob, original_blob_size, - force_copy)); + return query_blob + ? std::move(this->wrapAllocated(query_blob)) + : std::move(this->maybeCopyToAlignedMem(original_blob, input_blob_size, force_copy)); } template void MultiPreprocessorsContainer::preprocessQueryInPlace( - void *blob, size_t input_blob_bytes_count) const { + void *blob, size_t input_blob_size) const { for (auto pp : preprocessors) { if (!pp) break; // modifies the memory in place - pp->preprocessQueryInPlace(blob, input_blob_bytes_count, this->alignment); + pp->preprocessQueryInPlace(blob, input_blob_size, this->alignment); } } template void MultiPreprocessorsContainer::preprocessStorageInPlace( - void *blob, size_t input_blob_bytes_count) const { + void *blob, size_t input_blob_size) const { for (auto pp : preprocessors) { if (!pp) break; // modifies the memory in place - pp->preprocessStorageInPlace(blob, input_blob_bytes_count); + pp->preprocessStorageInPlace(blob, input_blob_size); } } diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 515a94ef0..b1bb4932e 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -28,12 +28,14 @@ class PreprocessorInterface : public VecsimBaseObject { // TODO: handle a dynamic processed_bytes_count, as the allocation size of the blob might change // down the preprocessors pipeline (such as in quantization preprocessor that compresses the // vector). + // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results + // are the same size. virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t input_blob_size, unsigned char alignment) const = 0; + size_t &input_blob_size, unsigned char alignment) const = 0; virtual void preprocessForStorage(const void *original_blob, void *&storage_blob, - size_t input_blob_size) const = 0; + size_t &input_blob_size) const = 0; virtual void preprocessQuery(const void *original_blob, void *&query_blob, - size_t input_blob_size, unsigned char alignment) const = 0; + size_t &input_blob_size, unsigned char alignment) const = 0; virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_size, unsigned char alignment) const = 0; virtual void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const = 0; @@ -53,8 +55,7 @@ class CosinePreprocessor : public PreprocessorInterface { // already allocated and processed it. So, we process it inplace. If it's null, we need to // allocate memory for it and copy the original_blob to it. void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t input_blob_size, unsigned char alignment) const override { - + size_t &input_blob_size, unsigned char alignment) const override { // Case 1: Blobs are different (one might be null, or both are allocated and processed // separately). if (storage_blob != query_blob) { @@ -80,31 +81,35 @@ class CosinePreprocessor : public PreprocessorInterface { // normalize one of them (since they point to the same memory). normalize_func(query_blob, this->dim); } + + input_blob_size = processed_bytes_count; } void preprocessForStorage(const void *original_blob, void *&blob, - size_t input_blob_size) const override { + size_t &input_blob_size) const override { if (blob == nullptr) { blob = this->allocator->allocate(processed_bytes_count); memcpy(blob, original_blob, input_blob_size); } normalize_func(blob, this->dim); + input_blob_size = processed_bytes_count; } - void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size, + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { if (blob == nullptr) { blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); memcpy(blob, original_blob, input_blob_size); } normalize_func(blob, this->dim); + input_blob_size = processed_bytes_count; } void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { assert(blob); - // TODO: replace with a debug assert and add values of input_blob_size and - // processed_bytes_count + // TODO: replace with a debug assert and log the exact values of input_blob_size and + // processed_bytes_count to improve observability assert(input_blob_size == this->processed_bytes_count); normalize_func(blob, this->dim); } From 1863722a77914b8bfaeb6dc1b95d8186cf3efcb8 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Mon, 12 May 2025 14:46:01 +0000 Subject: [PATCH 05/22] fix test --- tests/unit/test_components.cpp | 80 ++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 99441666f..a483e75fa 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -67,29 +67,29 @@ class DummyStoragePreprocessor : public PreprocessorInterface { } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const override { + size_t &input_blob_size, unsigned char alignment) const override { - this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count); + this->preprocessForStorage(original_blob, storage_blob, input_blob_size); } void preprocessForStorage(const void *original_blob, void *&blob, - size_t processed_bytes_count) const override { + size_t &input_blob_size) const override { // If the blob was not allocated yet, allocate it. if (blob == nullptr) { - blob = this->allocator->allocate(processed_bytes_count); - memcpy(blob, original_blob, processed_bytes_count); + blob = this->allocator->allocate(input_blob_size); + memcpy(blob, original_blob, input_blob_size); } static_cast(blob)[0] += value_to_add_storage; } - void preprocessQueryInPlace(void *blob, size_t processed_bytes_count, + void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override {} - void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override { + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override { assert(blob); static_cast(blob)[0] += value_to_add_storage; } - void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count, + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { /* do nothing*/ } @@ -112,25 +112,25 @@ class DummyQueryPreprocessor : public PreprocessorInterface { } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const override { - this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment); + size_t &input_blob_size, unsigned char alignment) const override { + this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment); } void preprocessForStorage(const void *original_blob, void *&blob, - size_t processed_bytes_count) const override { + size_t &input_blob_size) const override { /* do nothing*/ } - void preprocessQueryInPlace(void *blob, size_t processed_bytes_count, + void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { static_cast(blob)[0] += value_to_add_query; } - void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {} - void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count, + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {} + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { // If the blob was not allocated yet, allocate it. if (blob == nullptr) { - blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(blob, original_blob, processed_bytes_count); + blob = this->allocator->allocate_aligned(input_blob_size, alignment); + memcpy(blob, original_blob, input_blob_size); } static_cast(blob)[0] += value_to_add_query; } @@ -149,41 +149,41 @@ class DummyMixedPreprocessor : public PreprocessorInterface { : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage), value_to_add_query(value_to_add_query) {} void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const override { + size_t &input_blob_size, unsigned char alignment) const override { // One blob was already allocated by a previous preprocessor(s) that process both blobs the // same. The blobs are pointing to the same memory, we need to allocate another memory slot // to split them. if ((storage_blob == query_blob) && (query_blob != nullptr)) { - storage_blob = this->allocator->allocate(processed_bytes_count); - memcpy(storage_blob, query_blob, processed_bytes_count); + storage_blob = this->allocator->allocate(input_blob_size); + memcpy(storage_blob, query_blob, input_blob_size); } // Either both are nullptr or they are pointing to different memory slots. Both cases are // handled by the designated functions. - this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count); - this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment); + this->preprocessForStorage(original_blob, storage_blob, input_blob_size); + this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment); } void preprocessForStorage(const void *original_blob, void *&blob, - size_t processed_bytes_count) const override { + size_t &input_blob_size) const override { // If the blob was not allocated yet, allocate it. if (blob == nullptr) { - blob = this->allocator->allocate(processed_bytes_count); - memcpy(blob, original_blob, processed_bytes_count); + blob = this->allocator->allocate(input_blob_size); + memcpy(blob, original_blob, input_blob_size); } static_cast(blob)[0] += value_to_add_storage; } - void preprocessQueryInPlace(void *blob, size_t processed_bytes_count, + void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override {} - void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {} - void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count, + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {} + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { // If the blob was not allocated yet, allocate it. if (blob == nullptr) { - blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(blob, original_blob, processed_bytes_count); + blob = this->allocator->allocate_aligned(input_blob_size, alignment); + memcpy(blob, original_blob, input_blob_size); } static_cast(blob)[0] += value_to_add_query; } @@ -210,14 +210,16 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { static constexpr unsigned char getExcessValue() { return excess_value; } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t input_blob_size, unsigned char alignment) const override { + size_t &input_blob_size, unsigned char alignment) const override { // if the blobs are equal, if (storage_blob == query_blob) { preprocessGeneral(original_blob, storage_blob, input_blob_size, alignment); query_blob = storage_blob; return; } - // if the blobs are not equal + // The blobs are not equal + + // If the input blob size is not enough if (input_blob_size < processed_bytes_count) { auto alloc_and_process = [&](void *&blob) { auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); @@ -249,12 +251,15 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { alloc_and_process(storage_blob); alloc_and_process(query_blob); } + + // update the input blob size + input_blob_size = processed_bytes_count; } void preprocessForStorage(const void *original_blob, void *&blob, - size_t input_blob_size) const override { + size_t &input_blob_size) const override { - this->preprocessGeneral(original_blob, blob, processed_bytes_count); + this->preprocessGeneral(original_blob, blob, input_blob_size); } void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { @@ -272,13 +277,13 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { memset((char *)blob + processed_bytes_count, excess_value, input_blob_size - processed_bytes_count); } - void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size, + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { - this->preprocessGeneral(original_blob, blob, processed_bytes_count, alignment); + this->preprocessGeneral(original_blob, blob, input_blob_size, alignment); } private: - void preprocessGeneral(const void *original_blob, void *&blob, size_t input_blob_size, + void preprocessGeneral(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment = 0) const { // if the size of the input is not enough. if (input_blob_size < processed_bytes_count) { @@ -307,8 +312,9 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { input_blob_size - processed_bytes_count); } } + // update the input blob size + input_blob_size = processed_bytes_count; } - // TODO: change** the refernce** input_blob_size to processed_bytes_count }; } // namespace dummyPreprocessors From 55837baadd00c82a013421d12fdadb80c85a61a7 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Mon, 12 May 2025 14:47:21 +0000 Subject: [PATCH 06/22] fix tiered test --- tests/unit/test_hnsw_tiered.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index a6540dba5..0b7157c0c 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -4205,52 +4205,52 @@ class PreprocessorDoubleValue : public PreprocessorInterface { PreprocessorDoubleValue(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim) {} void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t processed_bytes_count, unsigned char alignment) const override { + size_t &input_blob_size, unsigned char alignment) const override { // One blob was already allocated by a previous preprocessor(s) that process both blobs the // same. The blobs are pointing to the same memory, we need to allocate another memory slot // to split them. if ((storage_blob == query_blob) && (query_blob != nullptr)) { - storage_blob = this->allocator->allocate(processed_bytes_count); - memcpy(storage_blob, query_blob, processed_bytes_count); + storage_blob = this->allocator->allocate(input_blob_size); + memcpy(storage_blob, query_blob, input_blob_size); } // Either both are nullptr or they are pointing to different memory slots. Both cases are // handled by the designated functions. - this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count); - this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment); + this->preprocessForStorage(original_blob, storage_blob, input_blob_size); + this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment); } void preprocessForStorage(const void *original_blob, void *&blob, - size_t processed_bytes_count) const override { + size_t &input_blob_size) const override { // If the blob was not allocated yet, allocate it. if (blob == nullptr) { - blob = this->allocator->allocate(processed_bytes_count); - memcpy(blob, original_blob, processed_bytes_count); + blob = this->allocator->allocate(input_blob_size); + memcpy(blob, original_blob, input_blob_size); } for (size_t i = 0; i < dim; i++) { static_cast(blob)[i] *= 2; } } - void preprocessQueryInPlace(void *blob, size_t processed_bytes_count, + void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { for (size_t i = 0; i < dim; i++) { static_cast(blob)[i] *= 2; } } - void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override { + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override { for (size_t i = 0; i < dim; i++) { static_cast(blob)[i] *= 2; } } - void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count, + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { // If the blob was not allocated yet, allocate it. if (blob == nullptr) { - blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(blob, original_blob, processed_bytes_count); + blob = this->allocator->allocate_aligned(input_blob_size, alignment); + memcpy(blob, original_blob, input_blob_size); } for (size_t i = 0; i < dim; i++) { static_cast(blob)[i] *= 2; From b1699adf4e27ffebbe103c785d2d3a6d0fb96428 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sat, 17 May 2025 04:50:17 +0000 Subject: [PATCH 07/22] add assert storage_blob == nullptr || input_blob_size == processed_bytes_count add valgrind macro value_to_add of DummyQueryPreprocessor and DummyMixedPreprocessor is now DataType instead of int --- Makefile | 1 + src/VecSim/spaces/computer/preprocessors.h | 22 ++- tests/unit/CMakeLists.txt | 6 + tests/unit/test_components.cpp | 219 ++++++++++++++++----- 4 files changed, 192 insertions(+), 56 deletions(-) diff --git a/Makefile b/Makefile index 450194de8..3fe149f0d 100644 --- a/Makefile +++ b/Makefile @@ -177,6 +177,7 @@ ifeq ($(VALGRIND),1) _CTEST_ARGS += \ -T memcheck \ --overwrite MemoryCheckCommandOptions="--leak-check=full --fair-sched=yes --error-exitcode=255" +CMAKE_FLAGS += -DUSE_VALGRIND=ON endif unit_test: diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index b1bb4932e..c9271c088 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -44,18 +44,25 @@ class PreprocessorInterface : public VecsimBaseObject { template class CosinePreprocessor : public PreprocessorInterface { public: - // This preprocessor assumes storage_blob and query_blob - // both are preprocessed in the same way, and yield a blob in the same size. + // This preprocessor requires that storage_blob and query_blob have identical memory sizes + // both before processing (as input) and after preprocessing completes. CosinePreprocessor(std::shared_ptr allocator, size_t dim, size_t processed_bytes_count) : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc()), dim(dim), processed_bytes_count(processed_bytes_count) {} - // If a blob (storage_blob or query_blob) is not nullptr, it means a previous preprocessor - // already allocated and processed it. So, we process it inplace. If it's null, we need to - // allocate memory for it and copy the original_blob to it. void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { + // This assert verifies that if a blob was allocated by a previous preprocessor, its + // size matches our expected processed size. Therefore, it is safe to skip re-allocation and + // process it inplace. Supporting dynamic resizing would require additional size checks (if + // statements) and memory management logic, which could impact performance. Currently, no + // code path requires this capability. If resizing becomes necessary in the future, remove + // the assertions and implement appropriate allocation handling with performance + // considerations. + assert(storage_blob == nullptr || input_blob_size == processed_bytes_count); + assert(query_blob == nullptr || input_blob_size == processed_bytes_count); + // Case 1: Blobs are different (one might be null, or both are allocated and processed // separately). if (storage_blob != query_blob) { @@ -87,6 +94,9 @@ class CosinePreprocessor : public PreprocessorInterface { void preprocessForStorage(const void *original_blob, void *&blob, size_t &input_blob_size) const override { + // see assert docs in preprocess + assert(blob == nullptr || input_blob_size == processed_bytes_count); + if (blob == nullptr) { blob = this->allocator->allocate(processed_bytes_count); memcpy(blob, original_blob, input_blob_size); @@ -97,6 +107,8 @@ class CosinePreprocessor : public PreprocessorInterface { void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, unsigned char alignment) const override { + // see assert docs in preprocess + assert(blob == nullptr || input_blob_size == processed_bytes_count); if (blob == nullptr) { blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); memcpy(blob, original_blob, input_blob_size); diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 8767e4190..dde8874f4 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -28,6 +28,12 @@ if(FP64_TESTS) add_definitions(-DFP64_TESTS) endif() +option(USE_VALGRIND "Building for Valgrind" OFF) +if(USE_VALGRIND) + add_definitions(-DRUNNING_ON_VALGRIND) + message(STATUS "Building with RUNNING_ON_VALGRIND definition for Valgrind compatibility") +endif() + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv8)|(armv9)") include(${root}/cmake/aarch64InstructionFlags.cmake) else() diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index a483e75fa..f74a55846 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -103,8 +103,8 @@ class DummyStoragePreprocessor : public PreprocessorInterface { template class DummyQueryPreprocessor : public PreprocessorInterface { public: - DummyQueryPreprocessor(std::shared_ptr allocator, int value_to_add_storage, - int _value_to_add_query = 0) + DummyQueryPreprocessor(std::shared_ptr allocator, + DataType value_to_add_storage, DataType _value_to_add_query = 0) : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage), value_to_add_query(_value_to_add_query) { if (!_value_to_add_query) @@ -136,16 +136,16 @@ class DummyQueryPreprocessor : public PreprocessorInterface { } private: - int value_to_add_storage; - int value_to_add_query; + DataType value_to_add_storage; + DataType value_to_add_query; }; // Dummy mixed preprocessor (precesses the blobs differently) template class DummyMixedPreprocessor : public PreprocessorInterface { public: - DummyMixedPreprocessor(std::shared_ptr allocator, int value_to_add_storage, - int value_to_add_query) + DummyMixedPreprocessor(std::shared_ptr allocator, + DataType value_to_add_storage, DataType value_to_add_query) : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage), value_to_add_query(value_to_add_query) {} void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, @@ -189,8 +189,8 @@ class DummyMixedPreprocessor : public PreprocessorInterface { } private: - int value_to_add_storage; - int value_to_add_query; + DataType value_to_add_storage; + DataType value_to_add_query; }; // TODO: test increase allocation size ( we don't really need another pp class for this) @@ -391,7 +391,7 @@ void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) { int initial_value = 1; int value_to_add = 7; const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value}; - size_t processed_bytes_count = sizeof(original_blob); + size_t original_blob_size = sizeof(original_blob); // Test computer with multiple preprocessors of the same type. auto multiPPContainer = @@ -399,7 +399,7 @@ void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) { auto verify_preprocess = [&](int expected_processed_value) { ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, processed_bytes_count); + multiPPContainer.preprocess(original_blob, original_blob_size); // Original blob should not be changed ASSERT_EQ(original_blob[0], initial_value); @@ -455,7 +455,7 @@ void multiPPContainerMixedPreprocessorNoAlignment() { int value_to_add_storage = 7; int value_to_add_query = 2; const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value}; - size_t processed_bytes_count = sizeof(original_blob); + size_t original_blob_size = sizeof(original_blob); // Test multiple preprocessors of the same type. auto multiPPContainer = @@ -472,7 +472,7 @@ void multiPPContainerMixedPreprocessorNoAlignment() { // scope this section so the blobs are released before the allocator. { ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, processed_bytes_count); + multiPPContainer.preprocess(original_blob, original_blob_size); // Original blob should not be changed ASSERT_EQ(original_blob[0], initial_value); @@ -497,7 +497,7 @@ void multiPPContainerMixedPreprocessorNoAlignment() { ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), 0); { ProcessedBlobs mixed_processed_blobs = - multiPPContainer.preprocess(original_blob, processed_bytes_count); + multiPPContainer.preprocess(original_blob, original_blob_size); const void *mixed_pp_storage_blob = mixed_processed_blobs.getStorageBlob(); const void *mixed_pp_query_blob = mixed_processed_blobs.getQueryBlob(); @@ -534,14 +534,14 @@ void multiPPContainerAlignment(dummyPreprocessors::pp_mode MODE) { int initial_value = 1; int value_to_add = 7; const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value}; - size_t processed_bytes_count = sizeof(original_blob); + size_t original_blob_size = sizeof(original_blob); auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); auto verify_preprocess = [&](int expected_processed_value) { ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, processed_bytes_count); + multiPPContainer.preprocess(original_blob, original_blob_size); const void *storage_blob = processed_blobs.getStorageBlob(); const void *query_blob = processed_blobs.getQueryBlob(); @@ -594,19 +594,18 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) { float value_to_add_storage = 7.0f; float value_to_add_query = 2.0f; const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value}; - // TODo: change this test so that original_blob_size != processed_bytes_count - constexpr size_t processed_bytes_count = sizeof(original_blob); + constexpr size_t original_blob_size = sizeof(original_blob); auto multiPPContainer = - MultiPreprocessorsContainer(allocator, alignment); + MultiPreprocessorsContainer(allocator, alignment); // adding cosine preprocessor auto cosine_preprocessor = - new (allocator) CosinePreprocessor(allocator, dim, processed_bytes_count); + new (allocator) CosinePreprocessor(allocator, dim, original_blob_size); multiPPContainer.addPreprocessor(cosine_preprocessor); { ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, sizeof(original_blob)); + multiPPContainer.preprocess(original_blob, original_blob_size); const void *storage_blob = processed_blobs.getStorageBlob(); const void *query_blob = processed_blobs.getQueryBlob(); // blobs should point to the same memory slot @@ -626,7 +625,7 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) { multiPPContainer.addPreprocessor(mixed_preprocessor); { ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, sizeof(original_blob)); + multiPPContainer.preprocess(original_blob, original_blob_size); const void *storage_blob = processed_blobs.getStorageBlob(); const void *query_blob = processed_blobs.getQueryBlob(); // blobs should point to a different memory slot @@ -649,6 +648,7 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) { // The preprocessors should be released by the preprocessors container. } +// Cosine pp receives different allocation for the storage and query blobs. TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { using namespace dummyPreprocessors; std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); @@ -657,23 +657,28 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { constexpr size_t dim = 4; unsigned char alignment = 5; - float initial_value = 1.0f; - float normalized_value = 0.5f; - float value_to_add_storage = 7.0f; - float value_to_add_query = 2.0f; - const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value}; - constexpr size_t processed_bytes_count = sizeof(original_blob); - + // In this test the first preprocessor allocates the memory for both blobs, according to the + // size passed by the pp container. The second preprocessor expects that if the blobs are + // already allocated, their size matches its processed_bytes_count. Hence, the blob size should + // be sufficient for the cosine preprocessing. + constexpr int8_t normalized_blob_bytes_count = dim * sizeof(int8_t) + sizeof(float); + auto blob_alloc = allocator->allocate_unique(normalized_blob_bytes_count); + int8_t *original_blob = static_cast(blob_alloc.get()); + test_utils::populate_int8_vec(original_blob, dim); + + // Processing params + int8_t value_to_add_storage = 7; + int8_t value_to_add_query = 4; // Creating multi preprocessors container auto mixed_preprocessor = new (allocator) - DummyMixedPreprocessor(allocator, value_to_add_storage, value_to_add_query); + DummyMixedPreprocessor(allocator, value_to_add_storage, value_to_add_query); auto multiPPContainer = - MultiPreprocessorsContainer(allocator, alignment); + MultiPreprocessorsContainer(allocator, alignment); multiPPContainer.addPreprocessor(mixed_preprocessor); { ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, sizeof(original_blob)); + multiPPContainer.preprocess(original_blob, normalized_blob_bytes_count); const void *storage_blob = processed_blobs.getStorageBlob(); const void *query_blob = processed_blobs.getQueryBlob(); // blobs should point to a different memory slot @@ -685,22 +690,46 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; ASSERT_EQ(address_alignment, 0); - // They need to be processed by both processors. - ASSERT_EQ(((const float *)storage_blob)[0], initial_value + value_to_add_storage); - ASSERT_EQ(((const float *)query_blob)[0], initial_value + value_to_add_query); + // Both blobs were processed. + ASSERT_EQ(((const int8_t *)storage_blob)[0], original_blob[0] + value_to_add_storage); + ASSERT_EQ(((const int8_t *)query_blob)[0], original_blob[0] + value_to_add_query); // the original blob should not change ASSERT_NE(storage_blob, original_blob); ASSERT_NE(query_blob, original_blob); } - // adding cosine preprocessor + // Processed blob expected output + auto expected_processed_blob = [&](int8_t *blob, int8_t value_to_add) { + memcpy(blob, original_blob, normalized_blob_bytes_count); + blob[0] += value_to_add; + VecSim_Normalize(blob, dim, VecSimType_INT8); + }; + + int8_t expected_processed_storage[normalized_blob_bytes_count] = {0}; + expected_processed_blob(expected_processed_storage, value_to_add_storage); + int8_t expected_processed_query[normalized_blob_bytes_count] = {0}; + expected_processed_blob(expected_processed_query, value_to_add_query); + + // normalize the original blob auto cosine_preprocessor = - new (allocator) CosinePreprocessor(allocator, dim, processed_bytes_count); + new (allocator) CosinePreprocessor(allocator, dim, normalized_blob_bytes_count); multiPPContainer.addPreprocessor(cosine_preprocessor); { + // An assertion should be raised by the cosine preprocessor for unmatching blob sizes. + // in valgrind the test continues, but the assertion appears in its log looking like an + // error, so to avoid confusion we skip this line in valgrind. +#ifndef RUNNING_ON_VALGRIND + EXPECT_EXIT( + { + ProcessedBlobs processed_blobs = multiPPContainer.preprocess( + original_blob, normalized_blob_bytes_count - sizeof(float)); + }, + testing::KilledBySignal(SIGABRT), "input_blob_size == processed_bytes_count"); +#endif + // Use the correct size ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, sizeof(original_blob)); + multiPPContainer.preprocess(original_blob, normalized_blob_bytes_count); const void *storage_blob = processed_blobs.getStorageBlob(); const void *query_blob = processed_blobs.getQueryBlob(); // blobs should point to a different memory slot @@ -711,23 +740,106 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { // They need to be allocated and processed ASSERT_NE(storage_blob, nullptr); ASSERT_NE(query_blob, nullptr); - float expected_processed_storage[dim] = {initial_value + value_to_add_storage, - initial_value, initial_value, initial_value}; - float expected_processed_query[dim] = {initial_value + value_to_add_query, initial_value, - initial_value, initial_value}; - VecSim_Normalize(expected_processed_storage, dim, VecSimType_FLOAT32); - VecSim_Normalize(expected_processed_query, dim, VecSimType_FLOAT32); - ASSERT_EQ(((const float *)storage_blob)[0], expected_processed_storage[0]); - ASSERT_EQ(((const float *)query_blob)[0], expected_processed_query[0]); // the original blob should not change ASSERT_NE(storage_blob, original_blob); ASSERT_NE(query_blob, original_blob); + + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_storage, + normalized_blob_bytes_count)); + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(query_blob), + expected_processed_query, + normalized_blob_bytes_count)); } // The preprocessors should be released by the preprocessors container. } -TEST(PreprocessorsTest, decrease_size_STORAGE_then_cosine_no_size_change) {} -TEST(PreprocessorsTest, decrease_size_QUERY_then_cosine_no_size_change) {} +template +void AsymmetricPPThenCosine(dummyPreprocessors::pp_mode MODE) { + using namespace dummyPreprocessors; + + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + + constexpr size_t n_preprocessors = 2; + constexpr size_t dim = 4; + unsigned char alignment = 5; + + float original_blob[dim] = {0}; + size_t original_blob_size = dim * sizeof(float); + test_utils::populate_float_vec(original_blob, dim); + + // Processing params + float value_to_add_storage = 0; + float value_to_add_query = 0; + if (MODE == STORAGE_ONLY) { + value_to_add_storage = 7; + } else if (MODE == QUERY_ONLY) { + value_to_add_query = 4; + } + + // Processed blob expected output + auto expected_processed_blob = [&](float *blob, float value_to_add) { + memcpy(blob, original_blob, original_blob_size); + blob[0] += value_to_add; + VecSim_Normalize(blob, dim, VecSimType_FLOAT32); + }; + + float expected_processed_storage[original_blob_size] = {0}; + expected_processed_blob(expected_processed_storage, value_to_add_storage); + float expected_processed_query[original_blob_size] = {0}; + expected_processed_blob(expected_processed_query, value_to_add_query); + + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + // will allocate either the storage or the query blob, depending on the preprocessor type. + // TODO : can we pass just value_to_add? + auto asymmetric_preprocessor = + new (allocator) PreprocessorType(allocator, value_to_add_storage, value_to_add_query); + auto cosine_preprocessor = + new (allocator) CosinePreprocessor(allocator, dim, original_blob_size); + + multiPPContainer.addPreprocessor(asymmetric_preprocessor); + multiPPContainer.addPreprocessor(cosine_preprocessor); + + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to a different memory slot + ASSERT_NE(storage_blob, query_blob); + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(query_blob, nullptr); + + // query blob should be aligned + unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; + ASSERT_EQ(address_alignment, 0); + + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_storage, dim)); + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(query_blob), + expected_processed_query, dim)); + } +} +// The first preprocessor in the chain allocates only the storage blob, +// and the responsibility of allocating the query blob is delegated to the next preprocessor +// in the chain (cosine preprocessor in this case). +TEST(PreprocessorsTest, STORAGE_then_cosine_no_size_change) { + using namespace dummyPreprocessors; + EXPECT_NO_FATAL_FAILURE( + AsymmetricPPThenCosine>(pp_mode::STORAGE_ONLY)); +} + +TEST(PreprocessorsTest, QUERY_then_cosine_no_size_change) { + using namespace dummyPreprocessors; + EXPECT_NO_FATAL_FAILURE( + AsymmetricPPThenCosine>(pp_mode::QUERY_ONLY)); +} + +// A test where the value of input_blob_size is modified by the first pp. +// The cosine pp processed_bytes_count should be set at initialization with the *modified* value, +// otherwise, an assertion will be raised by it for an allocated blob that is smaller than the +// expected size. TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) { using namespace dummyPreprocessors; std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); @@ -761,7 +873,7 @@ TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) { CosinePreprocessor(allocator, new_elem_amount, new_processed_bytes_count); // Creating multi preprocessors container auto multiPPContainer = - MultiPreprocessorsContainer(allocator, alignment); + MultiPreprocessorsContainer(allocator, alignment); multiPPContainer.addPreprocessor(decrease_size_preprocessor); multiPPContainer.addPreprocessor(cosine_preprocessor); @@ -784,13 +896,17 @@ TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) { } } +// In this test the original blob size is smaller than the processed_bytes_count of the +// cosine preprocessor. Before the bug fix, the cosine pp would try to copy processed_bytes_count +// bytes of the original blob to the allocated memory, which would cause an out of bound read, +// that should be detected by valgrind. TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { using namespace dummyPreprocessors; std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); constexpr size_t n_preprocessors = 2; constexpr size_t alignment = 5; - constexpr size_t elements = 8; + constexpr size_t elements = 7; // valgrind detects out of bound reads only if the considered memory is allocated on the heap, // rather than on the stack. @@ -806,7 +922,7 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { normalized_blob_bytes_count + elements_addition * sizeof(unsigned char); // Processed blob expected output - int8_t expected_processed_blob[elements + sizeof(float) + 3] = {0}; + int8_t expected_processed_blob[elements + sizeof(float) + elements_addition] = {0}; memcpy(expected_processed_blob, original_blob, original_blob_size); // normalize the original blob VecSim_Normalize(expected_processed_blob, elements, VecSimType_INT8); @@ -829,8 +945,9 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { multiPPContainer.addPreprocessor(increase_size_preprocessor); { + // Note: we pass the original blob size to detect out of bound reads in the cosine pp. ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, sizeof(original_blob)); + multiPPContainer.preprocess(original_blob, original_blob_size); const void *storage_blob = processed_blobs.getStorageBlob(); const void *query_blob = processed_blobs.getQueryBlob(); // blobs should point to the same memory slot From 6dc543df53daf8dba8bef5d22e102fc4222c8a76 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sat, 17 May 2025 14:35:44 +0000 Subject: [PATCH 08/22] enable assert only in debug --- tests/unit/test_components.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index f74a55846..6314cfb79 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -719,7 +719,7 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { // An assertion should be raised by the cosine preprocessor for unmatching blob sizes. // in valgrind the test continues, but the assertion appears in its log looking like an // error, so to avoid confusion we skip this line in valgrind. -#ifndef RUNNING_ON_VALGRIND +#if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG) EXPECT_EXIT( { ProcessedBlobs processed_blobs = multiPPContainer.preprocess( From 3e673b7b6eb1a3a5994eb144c85e91a35b01a8bb Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sat, 17 May 2025 14:39:15 +0000 Subject: [PATCH 09/22] use constexpr for blob size --- tests/unit/test_components.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 6314cfb79..ce7a0ddd7 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -765,7 +765,7 @@ void AsymmetricPPThenCosine(dummyPreprocessors::pp_mode MODE) { unsigned char alignment = 5; float original_blob[dim] = {0}; - size_t original_blob_size = dim * sizeof(float); + constexpr size_t original_blob_size = dim * sizeof(float); test_utils::populate_float_vec(original_blob, dim); // Processing params From 8967d40b237ce4a4c2afb932d60f6c161a8a6119 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sun, 18 May 2025 11:42:31 +0000 Subject: [PATCH 10/22] small docs changes --- src/VecSim/spaces/computer/preprocessors.h | 9 --------- tests/unit/CMakeLists.txt | 2 +- tests/unit/test_components.cpp | 1 - 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index c9271c088..88c34506e 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -17,17 +17,10 @@ #include "VecSim/spaces/spaces.h" #include "VecSim/memory/memory_utils.h" -// TODO: Handle processed_bytes_count that might change down the preprocessors pipeline. -// The preprocess function calls a pipeline of preprocessors, one of which can be a quantization -// preprocessor. In such cases, the quantization preprocessor compresses the vector, resulting in a -// change in the allocation size. class PreprocessorInterface : public VecsimBaseObject { public: PreprocessorInterface(std::shared_ptr allocator) : VecsimBaseObject(allocator) {} - // TODO: handle a dynamic processed_bytes_count, as the allocation size of the blob might change - // down the preprocessors pipeline (such as in quantization preprocessor that compresses the - // vector). // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results // are the same size. virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, @@ -120,8 +113,6 @@ class CosinePreprocessor : public PreprocessorInterface { void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { assert(blob); - // TODO: replace with a debug assert and log the exact values of input_blob_size and - // processed_bytes_count to improve observability assert(input_blob_size == this->processed_bytes_count); normalize_func(blob, this->dim); } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index dde8874f4..515ab2ae1 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -31,7 +31,7 @@ endif() option(USE_VALGRIND "Building for Valgrind" OFF) if(USE_VALGRIND) add_definitions(-DRUNNING_ON_VALGRIND) - message(STATUS "Building with RUNNING_ON_VALGRIND definition for Valgrind compatibility") + message(STATUS "Building with RUNNING_ON_VALGRIND") endif() if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv8)|(armv9)") diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index ce7a0ddd7..6c596fb08 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -193,7 +193,6 @@ class DummyMixedPreprocessor : public PreprocessorInterface { DataType value_to_add_query; }; -// TODO: test increase allocation size ( we don't really need another pp class for this) // A preprocessor that changes the allocation size of the blobs in the same manner. // set excess bytes to (char)2 template From 674b136b4e8c814d400993c5abbe964b76cbc453 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 27 May 2025 07:47:34 +0000 Subject: [PATCH 11/22] review fixes --- tests/unit/test_components.cpp | 57 +++++++++------------------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 6c596fb08..5fefe006a 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -218,9 +218,9 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { } // The blobs are not equal - // If the input blob size is not enough - if (input_blob_size < processed_bytes_count) { - auto alloc_and_process = [&](void *&blob) { + auto alloc_and_process = [&](void *&blob) { + // If the input blob size is not enough + if (input_blob_size < processed_bytes_count) { auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); if (blob == nullptr) { memcpy(new_blob, original_blob, input_blob_size); @@ -232,12 +232,7 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { blob = new_blob; memset((char *)blob + input_blob_size, excess_value, processed_bytes_count - input_blob_size); - }; - - alloc_and_process(storage_blob); - alloc_and_process(query_blob); - } else { - auto alloc_and_process = [&](void *&blob) { + } else { if (blob == nullptr) { blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); memcpy(blob, original_blob, processed_bytes_count); @@ -245,11 +240,11 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { memset((char *)blob + processed_bytes_count, excess_value, input_blob_size - processed_bytes_count); } - }; + } + }; - alloc_and_process(storage_blob); - alloc_and_process(query_blob); - } + alloc_and_process(storage_blob); + alloc_and_process(query_blob); // update the input blob size input_blob_size = processed_bytes_count; @@ -690,8 +685,10 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { ASSERT_EQ(address_alignment, 0); // Both blobs were processed. - ASSERT_EQ(((const int8_t *)storage_blob)[0], original_blob[0] + value_to_add_storage); - ASSERT_EQ(((const int8_t *)query_blob)[0], original_blob[0] + value_to_add_query); + ASSERT_EQ((static_cast(storage_blob))[0], + original_blob[0] + value_to_add_storage); + ASSERT_EQ((static_cast(query_blob))[0], + original_blob[0] + value_to_add_query); // the original blob should not change ASSERT_NE(storage_blob, original_blob); @@ -783,9 +780,9 @@ void AsymmetricPPThenCosine(dummyPreprocessors::pp_mode MODE) { VecSim_Normalize(blob, dim, VecSimType_FLOAT32); }; - float expected_processed_storage[original_blob_size] = {0}; + float expected_processed_storage[dim] = {0}; expected_processed_blob(expected_processed_storage, value_to_add_storage); - float expected_processed_query[original_blob_size] = {0}; + float expected_processed_query[dim] = {0}; expected_processed_blob(expected_processed_query, value_to_add_query); auto multiPPContainer = @@ -963,29 +960,3 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { final_blob_bytes_count)); } } - -TEST(PreprocessorsTest, cosine_then_change_size) { - // cosine (not changing) - // pp that changes the blob size -} - -TEST(PreprocessorsTest, cosine_change_then_pp_change) { - // cosine ( changing) - // pp that also changes the blob size -} - -// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { -// // add cosine pp that changes the original blob size -// // add a pp that preprocesses the normalized blob (same size) -// // add a pp that changes the storage_blob size, but not changing the query_blob size -// } - -// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { -// // add a pp that changes the storage_blob size, but not changing the query_blob size -// // add a pp that preprocesses the normalized blob (same size) -// // add cosine pp that changes the original blob size -// } - -// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) { -// // pp multi container where cosine is only needed for the query blob (not supported yet) -// } From d529f5ebbe9cf1908b7adf2902cc9ccef773f495 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 27 May 2025 07:59:06 +0000 Subject: [PATCH 12/22] =?UTF-8?q?=D7=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_components.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 5fefe006a..f6eabd3c8 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -925,7 +925,7 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { // add the values of the pp that increases the blob size unsigned char added_value = DummyChangeAllocSizePreprocessor::getExcessValue(); for (size_t i = 0; i < elements_addition; i++) { - expected_processed_blob[elements + sizeof(float) + i] = added_value; + expected_processed_blob[elements + sizeof(float) + i] = static_cast(added_value); } // A normalize pp - will allocate the blob + sizeof(float) From af11142218c3e0ee32dd58e46d02feb16a534457 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Wed, 28 May 2025 10:28:01 +0300 Subject: [PATCH 13/22] notes and changes --- src/VecSim/spaces/computer/preprocessors.h | 190 ++++++++++++++++++++- 1 file changed, 189 insertions(+), 1 deletion(-) diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 88c34506e..2b46e09d7 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -44,8 +44,9 @@ class CosinePreprocessor : public PreprocessorInterface { : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc()), dim(dim), processed_bytes_count(processed_bytes_count) {} + // TODO: add storage_blob_size and query_blob_size parameters void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &input_blob_size, unsigned char alignment) const override { + size_t &storage_blob_size, unsigned char alignment) const override { // This assert verifies that if a blob was allocated by a previous preprocessor, its // size matches our expected processed size. Therefore, it is safe to skip re-allocation and // process it inplace. Supporting dynamic resizing would require additional size checks (if @@ -128,3 +129,190 @@ class CosinePreprocessor : public PreprocessorInterface { const size_t dim; const size_t processed_bytes_count; }; + + +// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision of uint8_t. +// The quantization is done by finding the minimum and maximum values of the input vector, and then +// scaling the values to fit in the range of [0, 255]. The quantized values are then stored in a uint8_t array. +// [Quantized values, min, delta] +// Quantized Blob size = dim_elements * sizeof(int8) + 2 * sizeof(float) +// delta = (max_val - min_val) / 255.0f +// quantized_v[i] = (v[i] - min_val) / delta +// preprocessForStorage: +// if null: +// - We are not reallocing because it will be released after the query. +// Allocate quantized blob size +// 3. Compute (min, delta) and quantize to the quantized blob or in place. +// preprocessQuery: No-op – queries arrive as float32 and remain uncompressed + + +// preprocess -> +// if storage_blob == null || storage_blob == query_blob: +// allocate storage blob with storage_blob_size +// edge case: +// Check if the input size is not big enough and if not, reallocate it, and update the size. -add this scenario to tests +// quantize it the storage_blob + + +// add class member const size_t storage_bytes_count, and calculate it as follows: +// storage_bytes_count = dim * sizeof(uint8_t) + 2 * sizeof(float); + + +template +class QuantPreprocessor : public PreprocessorInterface { +public: + QuantPreprocessor(std::shared_ptr allocator, size_t dim, + size_t processed_bytes_count) + : PreprocessorInterface(allocator), dim(dim), processed_bytes_count(processed_bytes_count) {} + + // Helper function to perform quantization + void quantize(const float* input, uint8_t* output, float& min_val, float& delta) const { + // Find min and max values + min_val = input[0]; + float max_val = input[0]; + for (size_t i = 1; i < dim; i++) { + min_val = std::min(min_val, input[i]); + max_val = std::max(max_val, input[i]); + } + + // Calculate scaling factor + delta = (max_val - min_val) / 255.0f; + + // Quantize values + if (delta > 0) { + for (size_t i = 0; i < dim; i++) { + output[i] = static_cast((input[i] - min_val) / delta); + } + } else { + // Handle case where all values are the same + memset(output, 0, dim); + } + } + + // Calculate the norm of the dequantized vector + float calculateDequantizedNorm(const uint8_t* quantized, const float min_val, const float delta) const { + float norm = 0.0f; + for (size_t i = 0; i < dim; i++) { + float dequantized = min_val + quantized[i] * delta; + norm += dequantized * dequantized; + } + return norm > 0 ? 1.0f / std::sqrt(norm) : 0.0f; + } + + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &input_blob_size, unsigned char alignment) const override { + // This assert verifies that if a blob was allocated by a previous preprocessor, its + // size matches our expected processed size. Therefore, it is safe to skip re-allocation and + // process it inplace. Supporting dynamic resizing would require additional size checks (if + // statements) and memory management logic, which could impact performance. Currently, no + // code path requires this capability. If resizing becomes necessary in the future, remove + // the assertions and implement appropriate allocation handling with performance + // considerations. + assert(storage_blob == nullptr || input_blob_size == processed_bytes_count); + assert(query_blob == nullptr || input_blob_size == processed_bytes_count); + // Case 1: Blobs are different (one might be null, or both are allocated and processed + // separately). + if (storage_blob != query_blob) { + // If one of them is null, allocate memory for it and copy the original_blob to it. + if (storage_blob == nullptr) { + storage_blob = this->allocator->allocate(processed_bytes_count); + memcpy(storage_blob, original_blob, input_blob_size); + } else if (query_blob == nullptr) { + query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + memcpy(query_blob, original_blob, input_blob_size); + } + } else { // Case 2: Blobs are the same (either both are null or processed in the same way). + if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set + // storage_blob to point to it. + query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + memcpy(query_blob, original_blob, input_blob_size); + storage_blob = query_blob; + } + } + input_blob_size = processed_bytes_count; + } + + void preprocessForStorage(const void *original_blob, void *&blob, + size_t &input_blob_size) const override { + // see assert docs in preprocess + assert(blob == nullptr || input_blob_size == processed_bytes_count); + + if (blob == nullptr) { + blob = this->allocator->allocate(processed_bytes_count); + memcpy(blob, original_blob, input_blob_size); + } + + // Only quantize if input is float32 and we're quantizing to uint8_t + if constexpr (std::is_same_v) { + // Cast to appropriate types + const float* input = static_cast(original_blob); + uint8_t* quantized = static_cast(blob); + + // Reserve space for metadata at the end of the blob + // [quantized values, min_val, delta, inverted_norm] + float* metadata = reinterpret_cast(quantized + dim); + + // Perform quantization + float min_val, delta; + quantize(input, quantized, min_val, delta); + + // Store min_val and delta in the metadata + metadata[0] = min_val; + metadata[1] = delta; + + // Calculate and store the inverted norm for cosine similarity + metadata[2] = calculateDequantizedNorm(quantized, min_val, delta); + } + + input_blob_size = processed_bytes_count; + } + + void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, + unsigned char alignment) const override { + // see assert docs in preprocess + assert(blob == nullptr || input_blob_size == processed_bytes_count); + if (blob == nullptr) { + blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); + memcpy(blob, original_blob, input_blob_size); + } + input_blob_size = processed_bytes_count; + } + + void preprocessQueryInPlace(void *blob, size_t input_blob_size, + unsigned char alignment) const override { + assert(blob); + assert(input_blob_size == this->processed_bytes_count); + } + + void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override { + assert(blob); + assert(input_blob_size == this->processed_bytes_count); + + if constexpr (std::is_same_v) { + // Cast to appropriate types + float* input = static_cast(blob); + uint8_t* quantized = static_cast(blob); + + // Create temporary copy of input data + std::vector temp_input(input, input + dim); + + // Reserve space for metadata at the end of the blob + float* metadata = reinterpret_cast(quantized + dim); + + // Perform quantization + float min_val, delta; + quantize(temp_input.data(), quantized, min_val, delta); + + // Store min_val and delta in the metadata + metadata[0] = min_val; + metadata[1] = delta; + + // Calculate and store the inverted norm for cosine similarity + metadata[2] = calculateDequantizedNorm(quantized, min_val, delta); + } + } + +private: + const size_t dim; + const size_t processed_bytes_count; +}; From eacd40f1bbe4512fccac582c69fb4944dbe99f4d Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Thu, 29 May 2025 17:10:23 +0300 Subject: [PATCH 14/22] Added tests and changes to the PP --- .../spaces/computer/preprocessor_container.h | 8 +- src/VecSim/spaces/computer/preprocessors.h | 223 +++---- tests/unit/test_components.cpp | 624 +++++++++++++++++- tests/unit/test_hnsw_tiered.cpp | 11 + 4 files changed, 735 insertions(+), 131 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h index cd2f0dec8..41728de66 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.h +++ b/src/VecSim/spaces/computer/preprocessor_container.h @@ -171,10 +171,16 @@ MultiPreprocessorsContainer::preprocess(const void *o void *storage_blob = nullptr; void *query_blob = nullptr; + + // Sepreated variables for the storage blob size and query_blob_size, + // in case we need to change their sizes to different values. + size_t storage_blob_size = input_blob_size; + size_t query_blob_size = input_blob_size; + for (auto pp : preprocessors) { if (!pp) break; - pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment); + pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size, this->alignment); } // At least one blob was allocated. diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 8c8112916..725bba098 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "VecSim/memory/vecsim_base.h" #include "VecSim/spaces/spaces.h" @@ -23,8 +24,13 @@ class PreprocessorInterface : public VecsimBaseObject { : VecsimBaseObject(allocator) {} // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results // are the same size. + //TODO: Add query_blob_size as a parameter to the preprocess functions, to allow + // different sizes for storage and query blobs in the future, if needed. virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const = 0; + virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const = 0; virtual void preprocessForStorage(const void *original_blob, void *&storage_blob, size_t &input_blob_size) const = 0; virtual void preprocessQuery(const void *original_blob, void *&query_blob, @@ -44,6 +50,20 @@ class CosinePreprocessor : public PreprocessorInterface { : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc()), dim(dim), processed_bytes_count(processed_bytes_count) {} + + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + // This assert verifies that that the current use of this function is for blobs of the same + // size, which is the case for the Cosine preprocessor. If we ever need to support different + // sizes for storage and query blobs, we can remove the assert and implement the logic to + // handle different sizes. + assert(storage_blob_size == query_blob_size); + + preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); + query_blob_size = storage_blob_size; // Ensure both blobs have the same size after processing. + } + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { // This assert verifies that if a blob was allocated by a previous preprocessor, its @@ -145,173 +165,118 @@ class CosinePreprocessor : public PreprocessorInterface { // preprocessQuery: No-op – queries arrive as float32 and remain uncompressed -// preprocess -> -// if storage_blob == null || storage_blob == query_blob: -// allocate storage blob with storage_blob_size -// edge case: -// Check if the input size is not big enough and if not, reallocate it, and update the size. -add this scenario to tests -// quantize it the storage_blob - - -// add class member const size_t storage_bytes_count, and calculate it as follows: -// storage_bytes_count = dim * sizeof(uint8_t) + 2 * sizeof(float); - - -template class QuantPreprocessor : public PreprocessorInterface { public: - QuantPreprocessor(std::shared_ptr allocator, size_t dim, - size_t processed_bytes_count) - : PreprocessorInterface(allocator), dim(dim), processed_bytes_count(processed_bytes_count) {} + // Constructor for backward compatibility (single blob size) + QuantPreprocessor(std::shared_ptr allocator, size_t dim) + : PreprocessorInterface(allocator), dim(dim), + storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {} // quantized + min + delta + inverted_norm {} + // Helper function to perform quantization - void quantize(const float* input, uint8_t* output, float& min_val, float& delta) const { + void quantize(const float* input, uint8_t* quantized) const { + assert(dim > 0); // Find min and max values - min_val = input[0]; + float min_val = input[0]; float max_val = input[0]; - for (size_t i = 1; i < dim; i++) { + for (size_t i = 1; i < this->dim; i++) { min_val = std::min(min_val, input[i]); max_val = std::max(max_val, input[i]); } - + // Calculate scaling factor - delta = (max_val - min_val) / 255.0f; - - // Quantize values - if (delta > 0) { - for (size_t i = 0; i < dim; i++) { - output[i] = static_cast((input[i] - min_val) / delta); - } - } else { - // Handle case where all values are the same - memset(output, 0, dim); + const float diff = (max_val - min_val); + const float delta = diff == 0.0f ? 1.0f : diff / 255.0f; + const float inv_delta = 1.0f / delta; + + // Quantize the values + for (size_t i = 0; i < this->dim; i++) { + quantized[i] = static_cast(std::round((input[i] - min_val) * inv_delta)); } + + // Reserve space for metadata at the end of the blob + // [quantized values, min_val, delta, inverted_norm] + float* metadata = reinterpret_cast(quantized + this->dim); + + // Store min_val, delta, and inverted_norm in the metadata + metadata[0] = min_val; + metadata[1] = delta; } - // Calculate the norm of the dequantized vector - float calculateDequantizedNorm(const uint8_t* quantized, const float min_val, const float delta) const { - float norm = 0.0f; - for (size_t i = 0; i < dim; i++) { - float dequantized = min_val + quantized[i] * delta; - norm += dequantized * dequantized; - } - return norm > 0 ? 1.0f / std::sqrt(norm) : 0.0f; + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &input_blob_size, unsigned char alignment) const override { + // For backward compatibility - both blobs have the same size + preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size, alignment); } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &input_blob_size, unsigned char alignment) const override { - // This assert verifies that if a blob was allocated by a previous preprocessor, its - // size matches our expected processed size. Therefore, it is safe to skip re-allocation and - // process it inplace. Supporting dynamic resizing would require additional size checks (if - // statements) and memory management logic, which could impact performance. Currently, no - // code path requires this capability. If resizing becomes necessary in the future, remove - // the assertions and implement appropriate allocation handling with performance - // considerations. - assert(storage_blob == nullptr || input_blob_size == processed_bytes_count); - assert(query_blob == nullptr || input_blob_size == processed_bytes_count); - // Case 1: Blobs are different (one might be null, or both are allocated and processed - // separately). - if (storage_blob != query_blob) { - // If one of them is null, allocate memory for it and copy the original_blob to it. - if (storage_blob == nullptr) { - storage_blob = this->allocator->allocate(processed_bytes_count); - memcpy(storage_blob, original_blob, input_blob_size); - } else if (query_blob == nullptr) { - query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(query_blob, original_blob, input_blob_size); - } - } else { // Case 2: Blobs are the same (either both are null or processed in the same way). - if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set - // storage_blob to point to it. - query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(query_blob, original_blob, input_blob_size); - storage_blob = query_blob; + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + + + if (storage_blob) + { + if (storage_blob == query_blob) { + storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment); + } else if (storage_blob_size < storage_bytes_count) { + // Check if the blob allocated by a previous preprocessor is big enough, otherwise, realloc it. + // Can happen when the dim is smaller than the quantization metadata. + // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8 bytes. + // But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) = 10 bytes. + storage_blob = this->allocator->reallocate(storage_blob, this->storage_bytes_count); } + } - input_blob_size = processed_bytes_count; + else { + // storage_blob is nullptr, so we need to allocate new memory + storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment); + } + storage_blob_size = this->storage_bytes_count; + + // Cast to appropriate types + const float* input = static_cast(original_blob); + uint8_t* quantized = static_cast(storage_blob); + quantize(input, quantized); + } void preprocessForStorage(const void *original_blob, void *&blob, size_t &input_blob_size) const override { - // see assert docs in preprocess - assert(blob == nullptr || input_blob_size == processed_bytes_count); - + // Allocate quantized blob if needed if (blob == nullptr) { - blob = this->allocator->allocate(processed_bytes_count); - memcpy(blob, original_blob, input_blob_size); - } - - // Only quantize if input is float32 and we're quantizing to uint8_t - if constexpr (std::is_same_v) { - // Cast to appropriate types - const float* input = static_cast(original_blob); - uint8_t* quantized = static_cast(blob); - - // Reserve space for metadata at the end of the blob - // [quantized values, min_val, delta, inverted_norm] - float* metadata = reinterpret_cast(quantized + dim); - - // Perform quantization - float min_val, delta; - quantize(input, quantized, min_val, delta); - - // Store min_val and delta in the metadata - metadata[0] = min_val; - metadata[1] = delta; - - // Calculate and store the inverted norm for cosine similarity - metadata[2] = calculateDequantizedNorm(quantized, min_val, delta); + blob = this->allocator->allocate(storage_bytes_count); } - - input_blob_size = processed_bytes_count; + + // Cast to appropriate types + const float* input = static_cast(original_blob); + uint8_t* quantized = static_cast(blob); + quantize(input, quantized); + + input_blob_size = storage_bytes_count; } - void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size, + void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size, unsigned char alignment) const override { - // see assert docs in preprocess - assert(blob == nullptr || input_blob_size == processed_bytes_count); + // No-op: queries remain as float32 if (blob == nullptr) { - blob = this->allocator->allocate_aligned(processed_bytes_count, alignment); - memcpy(blob, original_blob, input_blob_size); + blob = this->allocator->allocate_aligned(query_blob_size, alignment); + memcpy(blob, original_blob, query_blob_size); } - input_blob_size = processed_bytes_count; } void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { + // No-op: queries remain as float32 assert(blob); - assert(input_blob_size == this->processed_bytes_count); } - void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override { - assert(blob); - assert(input_blob_size == this->processed_bytes_count); - - if constexpr (std::is_same_v) { - // Cast to appropriate types - float* input = static_cast(blob); - uint8_t* quantized = static_cast(blob); - - // Create temporary copy of input data - std::vector temp_input(input, input + dim); - - // Reserve space for metadata at the end of the blob - float* metadata = reinterpret_cast(quantized + dim); - - // Perform quantization - float min_val, delta; - quantize(temp_input.data(), quantized, min_val, delta); - - // Store min_val and delta in the metadata - metadata[0] = min_val; - metadata[1] = delta; - - // Calculate and store the inverted norm for cosine similarity - metadata[2] = calculateDequantizedNorm(quantized, min_val, delta); - } + void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override { + // This function is unused for this preprocessor. + assert(original_blob); + assert(input_blob_size >= storage_bytes_count); } private: const size_t dim; - const size_t processed_bytes_count; + const size_t storage_bytes_count; }; diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index f6eabd3c8..14dd78898 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -10,6 +10,7 @@ #include "gtest/gtest.h" #include "VecSim/vec_sim.h" #include "VecSim/spaces/computer/preprocessor_container.h" +#include "VecSim/spaces/computer/preprocessors.h" #include "VecSim/spaces/computer/calculator.h" #include "unit_test_utils.h" #include "tests_utils.h" @@ -65,7 +66,16 @@ class DummyStoragePreprocessor : public PreprocessorInterface { if (!value_to_add_query) value_to_add_query = value_to_add_storage; } - + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + // This assert verifies that there's no use for this function for now - different sizes for + // storage and query blobs. If such a use case arises, we can remove the assert and implement + // the logic to handle different sizes. + assert(storage_blob_size == query_blob_size); + + preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); + } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { @@ -111,6 +121,17 @@ class DummyQueryPreprocessor : public PreprocessorInterface { value_to_add_query = value_to_add_storage; } + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + // This assert verifies that there's no use for this function for now - different sizes for + // storage and query blobs. If such a use case arises, we can remove the assert and implement + // the logic to handle different sizes. + assert(storage_blob_size == query_blob_size); + + preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); + } + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment); @@ -148,6 +169,12 @@ class DummyMixedPreprocessor : public PreprocessorInterface { DataType value_to_add_storage, DataType value_to_add_query) : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage), value_to_add_query(value_to_add_query) {} + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); + } + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { @@ -208,6 +235,19 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { static constexpr unsigned char getExcessValue() { return excess_value; } + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + // if the blobs are equal, + if (storage_blob == query_blob) { + preprocessGeneral(original_blob, storage_blob, storage_blob_size, alignment); + query_blob = storage_blob; + query_blob_size = storage_blob_size; + return; + } + } + + // If the input blob size is not enough void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { // if the blobs are equal, @@ -960,3 +1000,585 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) { final_blob_bytes_count)); } } + +// Tests the quantization preprocessor with a single preprocessor in the chain. +// The QuantPreprocessor allocates the storage blob and processes it, while the query blob +// is unprocessed and allocated by the preprocessors container. +TEST(PreprocessorsTest, QuantizationTest) { + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + constexpr size_t n_preprocessors = 1; + constexpr size_t alignment = 5; + constexpr size_t elements = 7; + constexpr size_t original_blob_size = elements * sizeof(float); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + float *original_blob = static_cast(original_blob_alloc.get()); + for (size_t i = 0; i < elements; i++) { + original_blob[i] = static_cast(i); + } + + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(quant_preprocessor); + + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); + + constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float); + uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; + quant_preprocessor->quantize(original_blob, expected_processed_blob); + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_blob, + quantized_blob_bytes_count)); + + } + +} + +// Tests the quantization preprocessor with a cosine preprocessor in the chain. +// The QuantPreprocessor receives an allocated blob from the cosine preprocessor. +TEST(PreprocessorsTest, QuantizationTestWithCosine) { + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + constexpr size_t n_preprocessors = 2; + constexpr size_t alignment = 5; + constexpr size_t elements = 7; + constexpr size_t original_blob_size = elements * sizeof(float); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + float *original_blob = static_cast(original_blob_alloc.get()); + for (size_t i = 0; i < elements; i++) { + original_blob[i] = static_cast(i); + } + + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); + auto cosine_preprocessor = + new (allocator) CosinePreprocessor(allocator, elements, original_blob_size); + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(cosine_preprocessor); + multiPPContainer.addPreprocessor(quant_preprocessor); + + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(query_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); + + float expected_processed_blob[elements] = {0}; + memcpy(expected_processed_blob, original_blob, original_blob_size); + VecSim_Normalize(expected_processed_blob, elements, VecSimType_FLOAT32); + uint8_t quantized_blob[elements * sizeof(uint8_t) + 2 * sizeof(float)] = {0}; + // quantization should be applied after normalization + quant_preprocessor->quantize(expected_processed_blob, quantized_blob); + // compare the storage blob to the expected processed blob + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + quantized_blob, + elements)); + + } + +} + +// Tests the quantization preprocessor with a single preprocessor in the chain. +// The QuantPreprocessor allocates the storage blob with a size larger than the original blob. + +TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) { + // Checks that if not enough memory was allocated by a previous preprocessor, the quantization + // preprocessor will reallocate it. + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + constexpr size_t n_preprocessors = 1; + constexpr size_t alignment = 5; + constexpr size_t elements = 2; + constexpr size_t original_blob_size = elements * sizeof(float); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + float *original_blob = static_cast(original_blob_alloc.get()); + for (size_t i = 0; i < elements; i++) { + original_blob[i] = static_cast(i+10); + } + + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(quant_preprocessor); + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); + + constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float); + uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; + quant_preprocessor->quantize(original_blob, expected_processed_blob); + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_blob, + quantized_blob_bytes_count)); + } + +} + +// Tests the quantization preprocessor with a cosine preprocessor in the chain. +// The QuantPreprocessor receives an allocated blob from the cosine preprocessor, and needs to reallocate it. +// because the original blob size is smaller than the processed_bytes_count of the cosine preprocessor. +TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { + // Checks that if not enough memory was allocated by a previous preprocessor, the quantization + // preprocessor will reallocate it. + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + constexpr size_t n_preprocessors = 2; + constexpr size_t alignment = 5; + constexpr size_t elements = 2; + constexpr size_t original_blob_size = elements * sizeof(float); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + float *original_blob = static_cast(original_blob_alloc.get()); + for (size_t i = 0; i < elements; i++) { + original_blob[i] = static_cast(i+2.5f); + } + + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); + auto cosine_preprocessor = + new (allocator) CosinePreprocessor(allocator, elements, original_blob_size); + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(cosine_preprocessor); + multiPPContainer.addPreprocessor(quant_preprocessor); + + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(query_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); + + float expected_processed_blob[elements] = {0}; + memcpy(expected_processed_blob, original_blob, original_blob_size); + VecSim_Normalize(expected_processed_blob, elements, VecSimType_FLOAT32); + uint8_t quantized_blob[elements * sizeof(uint8_t) + 2 * sizeof(float)] = {0}; + // quantization should be applied after normalization + quant_preprocessor->quantize(expected_processed_blob, quantized_blob); + // compare the storage blob to the expected processed blob + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + quantized_blob, + elements)); + + } + +} + + +// TEST(PreprocessorsTest, CosineThenQuantizeTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 4; + +// auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); +// auto cosine_preprocessor = +// new (allocator) CosinePreprocessor(allocator, dim, dim * sizeof(float)); + +// auto multiPPContainer = +// MultiPreprocessorsContainer(allocator, 0); +// multiPPContainer.addPreprocessor(cosine_preprocessor); +// multiPPContainer.addPreprocessor(quant_preprocessor); +// // Test input vector with known values +// const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; +// const size_t input_blob_size = dim * sizeof(float); +// // Allocate output buffer +// auto processed_blobs = multiPPContainer.preprocess(input, input_blob_size); +// const void* quantized_blob = processed_blobs.getStorageBlob(); + + +// } + + + +// TEST(QuantPreprocessorTest, UniformValuesQuantizationTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 3; + +// QuantPreprocessor preprocessor(allocator, dim); + +// // Test with uniform values (edge case where max == min) +// const float input[dim] = {5.0f, 5.0f, 5.0f}; + +// const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// auto quantized_blob = allocator->allocate_unique(expected_blob_size); +// uint8_t* quantized = static_cast(quantized_blob.get()); + +// preprocessor.quantize(input, quantized); + +// // Extract metadata +// const float* metadata = reinterpret_cast(quantized + dim); +// const float min_val = metadata[0]; +// const float delta = metadata[1]; + +// // When all values are the same, delta should be 1.0f (fallback) +// EXPECT_FLOAT_EQ(min_val, 5.0f); +// EXPECT_FLOAT_EQ(delta, 1.0f); + +// // All quantized values should be 0 +// for (size_t i = 0; i < dim; i++) { +// EXPECT_EQ(quantized[i], 0); +// } +// } + +// TEST(QuantPreprocessorTest, PreprocessForStorageTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 4; + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {-1.0f, 0.0f, 1.0f, 2.0f}; +// size_t input_blob_size = dim * sizeof(float); + +// void* storage_blob = nullptr; + +// // Test preprocessForStorage +// preprocessor.preprocessForStorage(input, storage_blob, input_blob_size); + +// // Verify blob was allocated and size was updated +// ASSERT_NE(storage_blob, nullptr); +// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// EXPECT_EQ(input_blob_size, expected_storage_size); + +// // Verify quantization was performed correctly +// const uint8_t* quantized = static_cast(storage_blob); +// const float* metadata = reinterpret_cast(quantized + dim); + +// EXPECT_FLOAT_EQ(metadata[0], -1.0f); // min_val +// EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f); // delta = (2.0f - (-1.0f)) / 255.0f + +// // Clean up +// allocator->free_allocation(storage_blob); +// } + +// TEST(QuantPreprocessorTest, PreprocessQueryTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 4; + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; +// size_t query_blob_size = dim * sizeof(float); +// unsigned char alignment = 32; + +// void* query_blob = nullptr; + +// // Test preprocessQuery (should be no-op, keeping float32) +// preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment); + +// // Verify blob was allocated and remains float32 +// ASSERT_NE(query_blob, nullptr); +// EXPECT_EQ(query_blob_size, dim * sizeof(float)); // Size should remain unchanged + +// // Verify data was copied correctly (no quantization) +// const float* query_data = static_cast(query_blob); +// for (size_t i = 0; i < dim; i++) { +// EXPECT_FLOAT_EQ(query_data[i], input[i]); +// } + +// // Verify alignment +// EXPECT_EQ(reinterpret_cast(query_blob) % alignment, 0); + +// // Clean up +// allocator->free_allocation(query_blob); +// } + +// TEST(QuantPreprocessorTest, PreprocessQueryInPlaceTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 4; + +// QuantPreprocessor preprocessor(allocator, dim); + +// float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; +// size_t input_blob_size = dim * sizeof(float); +// unsigned char alignment = 16; + +// // Test preprocessQueryInPlace (should be no-op) +// preprocessor.preprocessQueryInPlace(input, input_blob_size, alignment); + +// // Verify data remains unchanged +// EXPECT_FLOAT_EQ(input[0], 1.0f); +// EXPECT_FLOAT_EQ(input[1], 2.0f); +// EXPECT_FLOAT_EQ(input[2], 3.0f); +// EXPECT_FLOAT_EQ(input[3], 4.0f); +// } + +// TEST(QuantPreprocessorTest, BackwardCompatibilityPreprocessTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 3; + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {0.0f, 5.0f, 10.0f}; +// size_t input_blob_size = dim * sizeof(float); +// unsigned char alignment = 16; + +// void* storage_blob = nullptr; +// void* query_blob = nullptr; + +// // Test backward compatibility interface (single blob size) +// preprocessor.preprocess(input, storage_blob, query_blob, input_blob_size, alignment); + +// // Verify storage blob was quantized +// ASSERT_NE(storage_blob, nullptr); +// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// EXPECT_EQ(input_blob_size, expected_storage_size); + +// // Verify query blob remains float32 +// ASSERT_NE(query_blob, nullptr); +// const float* query_data = static_cast(query_blob); +// for (size_t i = 0; i < dim; i++) { +// EXPECT_FLOAT_EQ(query_data[i], input[i]); +// } + +// // Clean up +// allocator->free_allocation(storage_blob); +// allocator->free_allocation(query_blob); +// } + +// TEST(QuantPreprocessorTest, DualParameterPreprocessTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 3; + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {0.0f, 5.0f, 10.0f}; +// size_t storage_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// size_t query_blob_size = dim * sizeof(float); +// unsigned char alignment = 16; + +// void* storage_blob = nullptr; +// void* query_blob = nullptr; + +// // Test dual parameter interface (different storage and query sizes) +// preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size, alignment); + +// // Verify storage blob was quantized with correct size +// ASSERT_NE(storage_blob, nullptr); +// const uint8_t* quantized = static_cast(storage_blob); +// const float* metadata = reinterpret_cast(quantized + dim); + +// EXPECT_FLOAT_EQ(metadata[0], 0.0f); // min_val +// EXPECT_FLOAT_EQ(metadata[1], 10.0f / 255.0f); // delta + +// // Verify query blob remains float32 with correct size +// ASSERT_NE(query_blob, nullptr); +// EXPECT_EQ(query_blob_size, dim * sizeof(float)); +// const float* query_data = static_cast(query_blob); +// for (size_t i = 0; i < dim; i++) { +// EXPECT_FLOAT_EQ(query_data[i], input[i]); +// } + +// // Clean up +// allocator->free_allocation(storage_blob); +// allocator->free_allocation(query_blob); +// } + +// TEST(QuantPreprocessorTest, ExtremeValuesQuantizationTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 4; + +// QuantPreprocessor preprocessor(allocator, dim); + +// // Test with extreme values +// const float input[dim] = {-1000.0f, -0.001f, 0.001f, 1000.0f}; + +// const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// auto quantized_blob = allocator->allocate_unique(expected_blob_size); +// uint8_t* quantized = static_cast(quantized_blob.get()); + +// preprocessor.quantize(input, quantized); + +// // Extract metadata +// const float* metadata = reinterpret_cast(quantized + dim); +// const float min_val = metadata[0]; +// const float delta = metadata[1]; + +// EXPECT_FLOAT_EQ(min_val, -1000.0f); +// EXPECT_FLOAT_EQ(delta, 2000.0f / 255.0f); // delta = (1000.0f - (-1000.0f)) / 255.0f + +// // Verify quantized values +// EXPECT_EQ(quantized[0], 0); // min value maps to 0 +// EXPECT_EQ(quantized[3], 255); // max value maps to 255 +// } + +// TEST(QuantPreprocessorTest, LargeDimensionTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 128; + +// QuantPreprocessor preprocessor(allocator, dim); + +// // Create test vector with linearly increasing values +// std::vector input(dim); +// for (size_t i = 0; i < dim; i++) { +// input[i] = static_cast(i); +// } + +// size_t input_blob_size = dim * sizeof(float); +// void* storage_blob = nullptr; + +// // Test preprocessForStorage with large dimension +// preprocessor.preprocessForStorage(input.data(), storage_blob, input_blob_size); + +// ASSERT_NE(storage_blob, nullptr); +// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// EXPECT_EQ(input_blob_size, expected_storage_size); + +// // Verify quantization +// const uint8_t* quantized = static_cast(storage_blob); +// const float* metadata = reinterpret_cast(quantized + dim); + +// EXPECT_FLOAT_EQ(metadata[0], 0.0f); // min_val +// EXPECT_FLOAT_EQ(metadata[1], (dim - 1.0f) / 255.0f); // delta + +// // Verify first and last quantized values +// EXPECT_EQ(quantized[0], 0); +// EXPECT_EQ(quantized[dim - 1], 255); + +// // Clean up +// allocator->free_allocation(storage_blob); +// } + +// TEST(QuantPreprocessorTest, MultiPreprocessorsContainerIntegrationTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 4; +// constexpr size_t n_preprocessors = 1; +// unsigned char alignment = 32; + +// auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); + +// // Add QuantPreprocessor to the container +// auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); +// ASSERT_EQ(multiPPContainer.addPreprocessor(quant_preprocessor), 0); + +// const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; +// size_t original_blob_size = dim * sizeof(float); + +// // Test preprocessing through container +// ProcessedBlobs processed_blobs = multiPPContainer.preprocess(input, original_blob_size); + +// const void* storage_blob = processed_blobs.getStorageBlob(); +// const void* query_blob = processed_blobs.getQueryBlob(); + +// // Verify storage blob was quantized +// ASSERT_NE(storage_blob, nullptr); +// ASSERT_NE(storage_blob, input); // Should be different from original + +// const uint8_t* quantized = static_cast(storage_blob); +// const float* metadata = reinterpret_cast(quantized + dim); + +// EXPECT_FLOAT_EQ(metadata[0], 1.0f); // min_val +// EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f); // delta + +// // Verify query blob remains float32 +// ASSERT_NE(query_blob, nullptr); +// const float* query_data = static_cast(query_blob); +// for (size_t i = 0; i < dim; i++) { +// EXPECT_FLOAT_EQ(query_data[i], input[i]); +// } + +// // Verify query blob alignment +// EXPECT_EQ(reinterpret_cast(query_blob) % alignment, 0); +// } + +// TEST(QuantPreprocessorTest, PreprocessForStorageWithPreAllocatedBlobTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 3; + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {2.0f, 4.0f, 6.0f}; +// size_t input_blob_size = dim * sizeof(float); + +// // Pre-allocate storage blob +// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// void* storage_blob = allocator->allocate(expected_storage_size); + +// // Test preprocessForStorage with pre-allocated blob +// preprocessor.preprocessForStorage(input, storage_blob, input_blob_size); + +// // Verify size was updated correctly +// EXPECT_EQ(input_blob_size, expected_storage_size); + +// // Verify quantization was performed +// const uint8_t* quantized = static_cast(storage_blob); +// const float* metadata = reinterpret_cast(quantized + dim); + +// EXPECT_FLOAT_EQ(metadata[0], 2.0f); // min_val +// EXPECT_FLOAT_EQ(metadata[1], 4.0f / 255.0f); // delta = (6.0f - 2.0f) / 255.0f + +// // Verify quantized values +// EXPECT_EQ(quantized[0], 0); // (2.0f - 2.0f) / delta = 0 +// EXPECT_EQ(quantized[1], 127); // (4.0f - 2.0f) / delta ≈ 127 +// EXPECT_EQ(quantized[2], 255); // (6.0f - 2.0f) / delta = 255 + +// // Clean up +// allocator->free_allocation(storage_blob); +// } + +// TEST(QuantPreprocessorTest, PreprocessQueryWithPreAllocatedBlobTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 3; + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {1.5f, 2.5f, 3.5f}; +// size_t query_blob_size = dim * sizeof(float); +// unsigned char alignment = 16; + +// // Pre-allocate query blob +// void* query_blob = allocator->allocate_aligned(query_blob_size, alignment); + +// // Test preprocessQuery with pre-allocated blob +// preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment); + +// // Verify size remains unchanged +// EXPECT_EQ(query_blob_size, dim * sizeof(float)); + +// // Verify data was copied correctly (no quantization) +// const float* query_data = static_cast(query_blob); +// for (size_t i = 0; i < dim; i++) { +// EXPECT_FLOAT_EQ(query_data[i], input[i]); +// } + +// // Clean up +// allocator->free_allocation(query_blob); +// } + +// TEST(QuantPreprocessorTest, ZeroDimensionEdgeCaseTest) { +// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); +// constexpr size_t dim = 1; // Minimum valid dimension + +// QuantPreprocessor preprocessor(allocator, dim); + +// const float input[dim] = {42.0f}; + +// const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); +// auto quantized_blob = allocator->allocate_unique(expected_blob_size); +// uint8_t* quantized = static_cast(quantized_blob.get()); + +// preprocessor.quantize(input, quantized); + +// // Extract metadata +// const float* metadata = reinterpret_cast(quantized + dim); +// const float min_val = metadata[0]; +// const float delta = metadata[1]; + +// // With single value, min == max, so delta should be 1.0f +// EXPECT_FLOAT_EQ(min_val, 42.0f); +// EXPECT_FLOAT_EQ(delta, 1.0f); +// EXPECT_EQ(quantized[0], 0); // Single value quantizes to 0 +// } + diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index 0b7157c0c..f446e46bc 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -4204,6 +4204,17 @@ class PreprocessorDoubleValue : public PreprocessorInterface { public: PreprocessorDoubleValue(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim) {} + + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { + // This assert makes sure the current use of the preprocessor is valid, + // i.e., both blobs are of the same size. + // In order to use different sizes, the preprocessor should be modified. + assert(storage_blob_size == query_blob_size); + preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); + } + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { From adec86bb911d329dbf298147634eb39072ea8eba Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Thu, 29 May 2025 18:08:00 +0300 Subject: [PATCH 15/22] frmat --- .../spaces/computer/preprocessor_container.h | 5 +- src/VecSim/spaces/computer/preprocessors.h | 64 ++++++++------- tests/unit/test_components.cpp | 77 ++++++++----------- tests/unit/test_hnsw_tiered.cpp | 6 +- 4 files changed, 70 insertions(+), 82 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h index 41728de66..915c3b83a 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.h +++ b/src/VecSim/spaces/computer/preprocessor_container.h @@ -172,7 +172,7 @@ MultiPreprocessorsContainer::preprocess(const void *o void *storage_blob = nullptr; void *query_blob = nullptr; - // Sepreated variables for the storage blob size and query_blob_size, + // Sepreated variables for the storage blob size and query_blob_size, // in case we need to change their sizes to different values. size_t storage_blob_size = input_blob_size; size_t query_blob_size = input_blob_size; @@ -180,7 +180,8 @@ MultiPreprocessorsContainer::preprocess(const void *o for (auto pp : preprocessors) { if (!pp) break; - pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size, this->alignment); + pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size, + this->alignment); } // At least one blob was allocated. diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 725bba098..f3055899f 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -24,7 +24,7 @@ class PreprocessorInterface : public VecsimBaseObject { : VecsimBaseObject(allocator) {} // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results // are the same size. - //TODO: Add query_blob_size as a parameter to the preprocess functions, to allow + // TODO: Add query_blob_size as a parameter to the preprocess functions, to allow // different sizes for storage and query blobs in the future, if needed. virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const = 0; @@ -50,7 +50,6 @@ class CosinePreprocessor : public PreprocessorInterface { : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc()), dim(dim), processed_bytes_count(processed_bytes_count) {} - void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &storage_blob_size, size_t &query_blob_size, unsigned char alignment) const override { @@ -61,7 +60,8 @@ class CosinePreprocessor : public PreprocessorInterface { assert(storage_blob_size == query_blob_size); preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); - query_blob_size = storage_blob_size; // Ensure both blobs have the same size after processing. + query_blob_size = + storage_blob_size; // Ensure both blobs have the same size after processing. } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, @@ -149,13 +149,11 @@ class CosinePreprocessor : public PreprocessorInterface { const size_t processed_bytes_count; }; - -// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision of uint8_t. -// The quantization is done by finding the minimum and maximum values of the input vector, and then -// scaling the values to fit in the range of [0, 255]. The quantized values are then stored in a uint8_t array. -// [Quantized values, min, delta] -// Quantized Blob size = dim_elements * sizeof(int8) + 2 * sizeof(float) -// delta = (max_val - min_val) / 255.0f +// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision +// of uint8_t. The quantization is done by finding the minimum and maximum values of the input +// vector, and then scaling the values to fit in the range of [0, 255]. The quantized values are +// then stored in a uint8_t array. [Quantized values, min, delta] Quantized Blob size = +// dim_elements * sizeof(int8) + 2 * sizeof(float) delta = (max_val - min_val) / 255.0f // quantized_v[i] = (v[i] - min_val) / delta // preprocessForStorage: // if null: @@ -164,17 +162,16 @@ class CosinePreprocessor : public PreprocessorInterface { // 3. Compute (min, delta) and quantize to the quantized blob or in place. // preprocessQuery: No-op – queries arrive as float32 and remain uncompressed - class QuantPreprocessor : public PreprocessorInterface { public: // Constructor for backward compatibility (single blob size) QuantPreprocessor(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim), - storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {} // quantized + min + delta + inverted_norm {} - + storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) { + } // quantized + min + delta + inverted_norm {} // Helper function to perform quantization - void quantize(const float* input, uint8_t* quantized) const { + void quantize(const float *input, uint8_t *quantized) const { assert(dim > 0); // Find min and max values float min_val = input[0]; @@ -196,7 +193,7 @@ class QuantPreprocessor : public PreprocessorInterface { // Reserve space for metadata at the end of the blob // [quantized values, min_val, delta, inverted_norm] - float* metadata = reinterpret_cast(quantized + this->dim); + float *metadata = reinterpret_cast(quantized + this->dim); // Store min_val, delta, and inverted_norm in the metadata metadata[0] = min_val; @@ -204,40 +201,39 @@ class QuantPreprocessor : public PreprocessorInterface { } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &input_blob_size, unsigned char alignment) const override { + size_t &input_blob_size, unsigned char alignment) const override { // For backward compatibility - both blobs have the same size - preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size, alignment); + preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size, + alignment); } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &storage_blob_size, size_t &query_blob_size, - unsigned char alignment) const override { - + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { - if (storage_blob) - { + if (storage_blob) { if (storage_blob == query_blob) { - storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment); + storage_blob = + this->allocator->allocate_aligned(this->storage_bytes_count, alignment); } else if (storage_blob_size < storage_bytes_count) { - // Check if the blob allocated by a previous preprocessor is big enough, otherwise, realloc it. - // Can happen when the dim is smaller than the quantization metadata. - // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8 bytes. - // But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) = 10 bytes. + // Check if the blob allocated by a previous preprocessor is big enough, otherwise, + // realloc it. Can happen when the dim is smaller than the quantization metadata. + // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8 + // bytes. But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) = + // 10 bytes. storage_blob = this->allocator->reallocate(storage_blob, this->storage_bytes_count); } - } - else { + } else { // storage_blob is nullptr, so we need to allocate new memory storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment); } storage_blob_size = this->storage_bytes_count; // Cast to appropriate types - const float* input = static_cast(original_blob); - uint8_t* quantized = static_cast(storage_blob); + const float *input = static_cast(original_blob); + uint8_t *quantized = static_cast(storage_blob); quantize(input, quantized); - } void preprocessForStorage(const void *original_blob, void *&blob, @@ -248,8 +244,8 @@ class QuantPreprocessor : public PreprocessorInterface { } // Cast to appropriate types - const float* input = static_cast(original_blob); - uint8_t* quantized = static_cast(blob); + const float *input = static_cast(original_blob); + uint8_t *quantized = static_cast(blob); quantize(input, quantized); input_blob_size = storage_bytes_count; diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 14dd78898..25c83b5d3 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -70,8 +70,8 @@ class DummyStoragePreprocessor : public PreprocessorInterface { size_t &storage_blob_size, size_t &query_blob_size, unsigned char alignment) const override { // This assert verifies that there's no use for this function for now - different sizes for - // storage and query blobs. If such a use case arises, we can remove the assert and implement - // the logic to handle different sizes. + // storage and query blobs. If such a use case arises, we can remove the assert and + // implement the logic to handle different sizes. assert(storage_blob_size == query_blob_size); preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); @@ -122,11 +122,11 @@ class DummyQueryPreprocessor : public PreprocessorInterface { } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &storage_blob_size, size_t &query_blob_size, - unsigned char alignment) const override { + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { // This assert verifies that there's no use for this function for now - different sizes for - // storage and query blobs. If such a use case arises, we can remove the assert and implement - // the logic to handle different sizes. + // storage and query blobs. If such a use case arises, we can remove the assert and + // implement the logic to handle different sizes. assert(storage_blob_size == query_blob_size); preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); @@ -170,8 +170,8 @@ class DummyMixedPreprocessor : public PreprocessorInterface { : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage), value_to_add_query(value_to_add_query) {} void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &storage_blob_size, size_t &query_blob_size, - unsigned char alignment) const override { + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); } @@ -236,8 +236,8 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { static constexpr unsigned char getExcessValue() { return excess_value; } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &storage_blob_size, size_t &query_blob_size, - unsigned char alignment) const override { + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { // if the blobs are equal, if (storage_blob == query_blob) { preprocessGeneral(original_blob, storage_blob, storage_blob_size, alignment); @@ -247,7 +247,7 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { } } - // If the input blob size is not enough + // If the input blob size is not enough void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { // if the blobs are equal, @@ -1015,7 +1015,7 @@ TEST(PreprocessorsTest, QuantizationTest) { for (size_t i = 0; i < elements; i++) { original_blob[i] = static_cast(i); } - + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); @@ -1030,15 +1030,14 @@ TEST(PreprocessorsTest, QuantizationTest) { ASSERT_NE(storage_blob, nullptr); ASSERT_NE(storage_blob, query_blob); - constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float); + constexpr size_t quantized_blob_bytes_count = + elements * sizeof(uint8_t) + 2 * sizeof(float); uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; quant_preprocessor->quantize(original_blob, expected_processed_blob); EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), - expected_processed_blob, - quantized_blob_bytes_count)); - + expected_processed_blob, + quantized_blob_bytes_count)); } - } // Tests the quantization preprocessor with a cosine preprocessor in the chain. @@ -1054,7 +1053,7 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) { for (size_t i = 0; i < elements; i++) { original_blob[i] = static_cast(i); } - + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); auto cosine_preprocessor = new (allocator) CosinePreprocessor(allocator, elements, original_blob_size); @@ -1081,11 +1080,8 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) { quant_preprocessor->quantize(expected_processed_blob, quantized_blob); // compare the storage blob to the expected processed blob EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), - quantized_blob, - elements)); - + quantized_blob, elements)); } - } // Tests the quantization preprocessor with a single preprocessor in the chain. @@ -1102,9 +1098,9 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) { auto original_blob_alloc = allocator->allocate_unique(original_blob_size); float *original_blob = static_cast(original_blob_alloc.get()); for (size_t i = 0; i < elements; i++) { - original_blob[i] = static_cast(i+10); + original_blob[i] = static_cast(i + 10); } - + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); @@ -1118,19 +1114,20 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) { ASSERT_NE(storage_blob, nullptr); ASSERT_NE(storage_blob, query_blob); - constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float); + constexpr size_t quantized_blob_bytes_count = + elements * sizeof(uint8_t) + 2 * sizeof(float); uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; quant_preprocessor->quantize(original_blob, expected_processed_blob); EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), - expected_processed_blob, - quantized_blob_bytes_count)); + expected_processed_blob, + quantized_blob_bytes_count)); } - } // Tests the quantization preprocessor with a cosine preprocessor in the chain. -// The QuantPreprocessor receives an allocated blob from the cosine preprocessor, and needs to reallocate it. -// because the original blob size is smaller than the processed_bytes_count of the cosine preprocessor. +// The QuantPreprocessor receives an allocated blob from the cosine preprocessor, and needs to +// reallocate it. because the original blob size is smaller than the processed_bytes_count of the +// cosine preprocessor. TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { // Checks that if not enough memory was allocated by a previous preprocessor, the quantization // preprocessor will reallocate it. @@ -1142,9 +1139,9 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { auto original_blob_alloc = allocator->allocate_unique(original_blob_size); float *original_blob = static_cast(original_blob_alloc.get()); for (size_t i = 0; i < elements; i++) { - original_blob[i] = static_cast(i+2.5f); + original_blob[i] = static_cast(i + 2.5f); } - + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); auto cosine_preprocessor = new (allocator) CosinePreprocessor(allocator, elements, original_blob_size); @@ -1171,14 +1168,10 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { quant_preprocessor->quantize(expected_processed_blob, quantized_blob); // compare the storage blob to the expected processed blob EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), - quantized_blob, - elements)); - + quantized_blob, elements)); } - } - // TEST(PreprocessorsTest, CosineThenQuantizeTest) { // std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); // constexpr size_t dim = 4; @@ -1198,10 +1191,7 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { // auto processed_blobs = multiPPContainer.preprocess(input, input_blob_size); // const void* quantized_blob = processed_blobs.getStorageBlob(); - // } - - // TEST(QuantPreprocessorTest, UniformValuesQuantizationTest) { // std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); @@ -1363,7 +1353,8 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { // void* query_blob = nullptr; // // Test dual parameter interface (different storage and query sizes) -// preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size, alignment); +// preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size, +// alignment); // // Verify storage blob was quantized with correct size // ASSERT_NE(storage_blob, nullptr); @@ -1457,7 +1448,8 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { // constexpr size_t n_preprocessors = 1; // unsigned char alignment = 32; -// auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); +// auto multiPPContainer = MultiPreprocessorsContainer(allocator, +// alignment); // // Add QuantPreprocessor to the container // auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); @@ -1581,4 +1573,3 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { // EXPECT_FLOAT_EQ(delta, 1.0f); // EXPECT_EQ(quantized[0], 0); // Single value quantizes to 0 // } - diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index f446e46bc..28a0bc1b2 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -4204,10 +4204,10 @@ class PreprocessorDoubleValue : public PreprocessorInterface { public: PreprocessorDoubleValue(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim) {} - + void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, - size_t &storage_blob_size, size_t &query_blob_size, - unsigned char alignment) const override { + size_t &storage_blob_size, size_t &query_blob_size, + unsigned char alignment) const override { // This assert makes sure the current use of the preprocessor is valid, // i.e., both blobs are of the same size. // In order to use different sizes, the preprocessor should be modified. From 31a0c7df660107288f6cceb98688e67b162a98b8 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Tue, 3 Jun 2025 16:54:19 +0300 Subject: [PATCH 16/22] Fix and add tests --- src/VecSim/spaces/computer/preprocessors.h | 120 +++-- tests/unit/test_components.cpp | 526 +++++---------------- 2 files changed, 205 insertions(+), 441 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index f3055899f..26811f946 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -60,8 +60,8 @@ class CosinePreprocessor : public PreprocessorInterface { assert(storage_blob_size == query_blob_size); preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment); - query_blob_size = - storage_blob_size; // Ensure both blobs have the same size after processing. + // Ensure both blobs have the same size after processing. + query_blob_size = storage_blob_size; } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, @@ -168,18 +168,14 @@ class QuantPreprocessor : public PreprocessorInterface { QuantPreprocessor(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim), storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) { - } // quantized + min + delta + inverted_norm {} + } // quantized + min + delta{} - // Helper function to perform quantization + // Helper function to perform quantization. This function is used by both preprocess and + // supports in-place quantization of the storage blob. void quantize(const float *input, uint8_t *quantized) const { - assert(dim > 0); + assert(input && quantized); // Find min and max values - float min_val = input[0]; - float max_val = input[0]; - for (size_t i = 1; i < this->dim; i++) { - min_val = std::min(min_val, input[i]); - max_val = std::max(max_val, input[i]); - } + auto [min_val, max_val] = find_min_max(input); // Calculate scaling factor const float diff = (max_val - min_val); @@ -191,18 +187,16 @@ class QuantPreprocessor : public PreprocessorInterface { quantized[i] = static_cast(std::round((input[i] - min_val) * inv_delta)); } - // Reserve space for metadata at the end of the blob - // [quantized values, min_val, delta, inverted_norm] float *metadata = reinterpret_cast(quantized + this->dim); - // Store min_val, delta, and inverted_norm in the metadata + // Store min_val, delta, in the metadata metadata[0] = min_val; metadata[1] = delta; } void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const override { - // For backward compatibility - both blobs have the same size + // For backward compatibility - delegate to the two-size version with identical sizes preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size, alignment); } @@ -210,30 +204,56 @@ class QuantPreprocessor : public PreprocessorInterface { void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &storage_blob_size, size_t &query_blob_size, unsigned char alignment) const override { - - if (storage_blob) { + // CASE 1: STORAGE BLOB NEEDS ALLOCATION + if (!storage_blob) { + // Allocate aligned memory for the quantized storage blob + storage_blob = static_cast( + this->allocator->allocate_aligned(this->storage_bytes_count, alignment)); + + // Quantize directly from original data + const float *input = static_cast(original_blob); + quantize(input, static_cast(storage_blob)); + } + // CASE 2: STORAGE BLOB EXISTS + else { + // CASE 2A: STORAGE AND QUERY SHARE MEMORY if (storage_blob == query_blob) { - storage_blob = + // Need to allocate a separate storage blob since query remains float32 + // while storage needs to be quantized + void *new_storage = this->allocator->allocate_aligned(this->storage_bytes_count, alignment); - } else if (storage_blob_size < storage_bytes_count) { - // Check if the blob allocated by a previous preprocessor is big enough, otherwise, - // realloc it. Can happen when the dim is smaller than the quantization metadata. - // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8 - // bytes. But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) = - // 10 bytes. - storage_blob = this->allocator->reallocate(storage_blob, this->storage_bytes_count); - } - } else { - // storage_blob is nullptr, so we need to allocate new memory - storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment); + // Quantize from the shared blob (query_blob) to the new storage blob + quantize(static_cast(query_blob), + static_cast(new_storage)); + + // Update storage_blob to point to the new memory + storage_blob = new_storage; + } + // CASE 2B: SEPARATE STORAGE AND QUERY BLOBS + else { + // Check if storage blob needs resizing + if (storage_blob_size < this->storage_bytes_count) { + // Allocate new storage with correct size + uint8_t *new_storage = static_cast( + this->allocator->allocate_aligned(this->storage_bytes_count, alignment)); + + // Quantize from old storage to new storage + quantize(static_cast(storage_blob), + static_cast(new_storage)); + + // Free old storage and update pointer + this->allocator->free_allocation(storage_blob); + storage_blob = new_storage; + } else { + // Storage blob is large enough, quantize in-place + quantize(static_cast(storage_blob), + static_cast(storage_blob)); + } + } } - storage_blob_size = this->storage_bytes_count; - // Cast to appropriate types - const float *input = static_cast(original_blob); - uint8_t *quantized = static_cast(storage_blob); - quantize(input, quantized); + storage_blob_size = this->storage_bytes_count; } void preprocessForStorage(const void *original_blob, void *&blob, @@ -267,12 +287,42 @@ class QuantPreprocessor : public PreprocessorInterface { } void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override { - // This function is unused for this preprocessor. assert(original_blob); assert(input_blob_size >= storage_bytes_count); + + // Only quantize in-place if input buffer is large enough + if (input_blob_size >= storage_bytes_count) { + quantize(static_cast(original_blob), + static_cast(original_blob)); + } else { + // Fallback: this shouldn't happen if caller allocated correctly + assert(false && "Input buffer too small for in-place quantization"); + } } private: + std::pair find_min_max(const float *input) const { + float min_val = input[0]; + float max_val = input[0]; + + size_t i = 1; + // Process 4 elements at a time for better performance + for (; i + 3 < dim; i += 4) { + const float v0 = input[i]; + const float v1 = input[i + 1]; + const float v2 = input[i + 2]; + const float v3 = input[i + 3]; + min_val = std::min({min_val, v0, v1, v2, v3}); + max_val = std::max({max_val, v0, v1, v2, v3}); + } + // Handle remaining elements + for (; i < dim; i++) { + min_val = std::min(min_val, input[i]); + max_val = std::max(max_val, input[i]); + } + return {min_val, max_val}; + } + const size_t dim; const size_t storage_bytes_count; }; diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 25c83b5d3..96a925d39 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -243,7 +243,6 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface { preprocessGeneral(original_blob, storage_blob, storage_blob_size, alignment); query_blob = storage_blob; query_blob_size = storage_blob_size; - return; } } @@ -1012,9 +1011,7 @@ TEST(PreprocessorsTest, QuantizationTest) { constexpr size_t original_blob_size = elements * sizeof(float); auto original_blob_alloc = allocator->allocate_unique(original_blob_size); float *original_blob = static_cast(original_blob_alloc.get()); - for (size_t i = 0; i < elements; i++) { - original_blob[i] = static_cast(i); - } + test_utils::populate_float_vec(original_blob, elements); auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); auto multiPPContainer = @@ -1032,11 +1029,33 @@ TEST(PreprocessorsTest, QuantizationTest) { constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float); - uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; + auto expected_processed_blob_alloc = allocator->allocate_unique(quantized_blob_bytes_count); + uint8_t *expected_processed_blob = + static_cast(expected_processed_blob_alloc.get()); + quant_preprocessor->quantize(original_blob, expected_processed_blob); EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), expected_processed_blob, quantized_blob_bytes_count)); + + // Compare the min and delta values of the quantized blob and expected_processed_blob + const float *storage_blob_metadata = + reinterpret_cast(static_cast(storage_blob) + elements); + const float *qexpected_processed_metadata = + reinterpret_cast(expected_processed_blob + elements); + // Check that the min and delta values are close enough + ASSERT_NEAR(storage_blob_metadata[0], qexpected_processed_metadata[0], 1e-6); + ASSERT_NEAR(storage_blob_metadata[1], qexpected_processed_metadata[1], 1e-6); + + float min = storage_blob_metadata[0]; + float delta = storage_blob_metadata[1]; + // reconstruct the original blob from the quantized blob + uint8_t *uint8_storage_blob = static_cast(const_cast(storage_blob)); + for (size_t i = 0; i < elements; i++) { + float reconstructed_value = min + uint8_storage_blob[i] * delta; + ASSERT_NEAR(reconstructed_value, original_blob[i], 0.01) + << "Reconstructed blob differs from the original blob at index " << i; + } } } @@ -1050,9 +1069,7 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) { constexpr size_t original_blob_size = elements * sizeof(float); auto original_blob_alloc = allocator->allocate_unique(original_blob_size); float *original_blob = static_cast(original_blob_alloc.get()); - for (size_t i = 0; i < elements; i++) { - original_blob[i] = static_cast(i); - } + test_utils::populate_float_vec(original_blob, elements); auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); auto cosine_preprocessor = @@ -1072,15 +1089,27 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) { ASSERT_NE(query_blob, nullptr); ASSERT_NE(storage_blob, query_blob); - float expected_processed_blob[elements] = {0}; + auto expected_processed_blob_alloc = allocator->allocate_unique(original_blob_size); + float *expected_processed_blob = static_cast(expected_processed_blob_alloc.get()); memcpy(expected_processed_blob, original_blob, original_blob_size); VecSim_Normalize(expected_processed_blob, elements, VecSimType_FLOAT32); - uint8_t quantized_blob[elements * sizeof(uint8_t) + 2 * sizeof(float)] = {0}; + + auto quantized_blob_alloc = + allocator->allocate_unique(elements * sizeof(uint8_t) + 2 * sizeof(float)); + uint8_t *quantized_blob = static_cast(quantized_blob_alloc.get()); + // quantization should be applied after normalization quant_preprocessor->quantize(expected_processed_blob, quantized_blob); // compare the storage blob to the expected processed blob EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), quantized_blob, elements)); + const float *storage_blob_metadata = + reinterpret_cast(static_cast(storage_blob) + elements); + const float *qexpected_processed_metadata = + reinterpret_cast(quantized_blob + elements); + // Check that the min and delta values are close enough + ASSERT_FLOAT_EQ(storage_blob_metadata[0], qexpected_processed_metadata[0]); + ASSERT_FLOAT_EQ(storage_blob_metadata[1], qexpected_processed_metadata[1]); } } @@ -1091,19 +1120,20 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) { // Checks that if not enough memory was allocated by a previous preprocessor, the quantization // preprocessor will reallocate it. std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); - constexpr size_t n_preprocessors = 1; + constexpr size_t n_preprocessors = 2; constexpr size_t alignment = 5; constexpr size_t elements = 2; constexpr size_t original_blob_size = elements * sizeof(float); auto original_blob_alloc = allocator->allocate_unique(original_blob_size); float *original_blob = static_cast(original_blob_alloc.get()); - for (size_t i = 0; i < elements; i++) { - original_blob[i] = static_cast(i + 10); - } + test_utils::populate_float_vec(original_blob, elements); auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); + auto dummy_preprocessor = + new (allocator) dummyPreprocessors::DummyStoragePreprocessor(allocator, 0.0f); auto multiPPContainer = MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(dummy_preprocessor); multiPPContainer.addPreprocessor(quant_preprocessor); { ProcessedBlobs processed_blobs = @@ -1172,404 +1202,88 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { } } -// TEST(PreprocessorsTest, CosineThenQuantizeTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 4; - -// auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); -// auto cosine_preprocessor = -// new (allocator) CosinePreprocessor(allocator, dim, dim * sizeof(float)); - -// auto multiPPContainer = -// MultiPreprocessorsContainer(allocator, 0); -// multiPPContainer.addPreprocessor(cosine_preprocessor); -// multiPPContainer.addPreprocessor(quant_preprocessor); -// // Test input vector with known values -// const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; -// const size_t input_blob_size = dim * sizeof(float); -// // Allocate output buffer -// auto processed_blobs = multiPPContainer.preprocess(input, input_blob_size); -// const void* quantized_blob = processed_blobs.getStorageBlob(); - -// } - -// TEST(QuantPreprocessorTest, UniformValuesQuantizationTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 3; - -// QuantPreprocessor preprocessor(allocator, dim); - -// // Test with uniform values (edge case where max == min) -// const float input[dim] = {5.0f, 5.0f, 5.0f}; - -// const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// auto quantized_blob = allocator->allocate_unique(expected_blob_size); -// uint8_t* quantized = static_cast(quantized_blob.get()); - -// preprocessor.quantize(input, quantized); - -// // Extract metadata -// const float* metadata = reinterpret_cast(quantized + dim); -// const float min_val = metadata[0]; -// const float delta = metadata[1]; - -// // When all values are the same, delta should be 1.0f (fallback) -// EXPECT_FLOAT_EQ(min_val, 5.0f); -// EXPECT_FLOAT_EQ(delta, 1.0f); - -// // All quantized values should be 0 -// for (size_t i = 0; i < dim; i++) { -// EXPECT_EQ(quantized[i], 0); -// } -// } - -// TEST(QuantPreprocessorTest, PreprocessForStorageTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 4; - -// QuantPreprocessor preprocessor(allocator, dim); - -// const float input[dim] = {-1.0f, 0.0f, 1.0f, 2.0f}; -// size_t input_blob_size = dim * sizeof(float); - -// void* storage_blob = nullptr; - -// // Test preprocessForStorage -// preprocessor.preprocessForStorage(input, storage_blob, input_blob_size); - -// // Verify blob was allocated and size was updated -// ASSERT_NE(storage_blob, nullptr); -// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// EXPECT_EQ(input_blob_size, expected_storage_size); - -// // Verify quantization was performed correctly -// const uint8_t* quantized = static_cast(storage_blob); -// const float* metadata = reinterpret_cast(quantized + dim); - -// EXPECT_FLOAT_EQ(metadata[0], -1.0f); // min_val -// EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f); // delta = (2.0f - (-1.0f)) / 255.0f - -// // Clean up -// allocator->free_allocation(storage_blob); -// } - -// TEST(QuantPreprocessorTest, PreprocessQueryTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 4; - -// QuantPreprocessor preprocessor(allocator, dim); - -// const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; -// size_t query_blob_size = dim * sizeof(float); -// unsigned char alignment = 32; - -// void* query_blob = nullptr; - -// // Test preprocessQuery (should be no-op, keeping float32) -// preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment); - -// // Verify blob was allocated and remains float32 -// ASSERT_NE(query_blob, nullptr); -// EXPECT_EQ(query_blob_size, dim * sizeof(float)); // Size should remain unchanged - -// // Verify data was copied correctly (no quantization) -// const float* query_data = static_cast(query_blob); -// for (size_t i = 0; i < dim; i++) { -// EXPECT_FLOAT_EQ(query_data[i], input[i]); -// } - -// // Verify alignment -// EXPECT_EQ(reinterpret_cast(query_blob) % alignment, 0); - -// // Clean up -// allocator->free_allocation(query_blob); -// } - -// TEST(QuantPreprocessorTest, PreprocessQueryInPlaceTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 4; - -// QuantPreprocessor preprocessor(allocator, dim); - -// float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; -// size_t input_blob_size = dim * sizeof(float); -// unsigned char alignment = 16; - -// // Test preprocessQueryInPlace (should be no-op) -// preprocessor.preprocessQueryInPlace(input, input_blob_size, alignment); - -// // Verify data remains unchanged -// EXPECT_FLOAT_EQ(input[0], 1.0f); -// EXPECT_FLOAT_EQ(input[1], 2.0f); -// EXPECT_FLOAT_EQ(input[2], 3.0f); -// EXPECT_FLOAT_EQ(input[3], 4.0f); -// } - -// TEST(QuantPreprocessorTest, BackwardCompatibilityPreprocessTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 3; - -// QuantPreprocessor preprocessor(allocator, dim); - -// const float input[dim] = {0.0f, 5.0f, 10.0f}; -// size_t input_blob_size = dim * sizeof(float); -// unsigned char alignment = 16; - -// void* storage_blob = nullptr; -// void* query_blob = nullptr; - -// // Test backward compatibility interface (single blob size) -// preprocessor.preprocess(input, storage_blob, query_blob, input_blob_size, alignment); - -// // Verify storage blob was quantized -// ASSERT_NE(storage_blob, nullptr); -// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// EXPECT_EQ(input_blob_size, expected_storage_size); - -// // Verify query blob remains float32 -// ASSERT_NE(query_blob, nullptr); -// const float* query_data = static_cast(query_blob); -// for (size_t i = 0; i < dim; i++) { -// EXPECT_FLOAT_EQ(query_data[i], input[i]); -// } - -// // Clean up -// allocator->free_allocation(storage_blob); -// allocator->free_allocation(query_blob); -// } - -// TEST(QuantPreprocessorTest, DualParameterPreprocessTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 3; - -// QuantPreprocessor preprocessor(allocator, dim); - -// const float input[dim] = {0.0f, 5.0f, 10.0f}; -// size_t storage_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// size_t query_blob_size = dim * sizeof(float); -// unsigned char alignment = 16; - -// void* storage_blob = nullptr; -// void* query_blob = nullptr; - -// // Test dual parameter interface (different storage and query sizes) -// preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size, -// alignment); - -// // Verify storage blob was quantized with correct size -// ASSERT_NE(storage_blob, nullptr); -// const uint8_t* quantized = static_cast(storage_blob); -// const float* metadata = reinterpret_cast(quantized + dim); - -// EXPECT_FLOAT_EQ(metadata[0], 0.0f); // min_val -// EXPECT_FLOAT_EQ(metadata[1], 10.0f / 255.0f); // delta - -// // Verify query blob remains float32 with correct size -// ASSERT_NE(query_blob, nullptr); -// EXPECT_EQ(query_blob_size, dim * sizeof(float)); -// const float* query_data = static_cast(query_blob); -// for (size_t i = 0; i < dim; i++) { -// EXPECT_FLOAT_EQ(query_data[i], input[i]); -// } - -// // Clean up -// allocator->free_allocation(storage_blob); -// allocator->free_allocation(query_blob); -// } - -// TEST(QuantPreprocessorTest, ExtremeValuesQuantizationTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 4; - -// QuantPreprocessor preprocessor(allocator, dim); - -// // Test with extreme values -// const float input[dim] = {-1000.0f, -0.001f, 0.001f, 1000.0f}; - -// const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// auto quantized_blob = allocator->allocate_unique(expected_blob_size); -// uint8_t* quantized = static_cast(quantized_blob.get()); - -// preprocessor.quantize(input, quantized); - -// // Extract metadata -// const float* metadata = reinterpret_cast(quantized + dim); -// const float min_val = metadata[0]; -// const float delta = metadata[1]; - -// EXPECT_FLOAT_EQ(min_val, -1000.0f); -// EXPECT_FLOAT_EQ(delta, 2000.0f / 255.0f); // delta = (1000.0f - (-1000.0f)) / 255.0f - -// // Verify quantized values -// EXPECT_EQ(quantized[0], 0); // min value maps to 0 -// EXPECT_EQ(quantized[3], 255); // max value maps to 255 -// } - -// TEST(QuantPreprocessorTest, LargeDimensionTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 128; - -// QuantPreprocessor preprocessor(allocator, dim); - -// // Create test vector with linearly increasing values -// std::vector input(dim); -// for (size_t i = 0; i < dim; i++) { -// input[i] = static_cast(i); -// } - -// size_t input_blob_size = dim * sizeof(float); -// void* storage_blob = nullptr; - -// // Test preprocessForStorage with large dimension -// preprocessor.preprocessForStorage(input.data(), storage_blob, input_blob_size); - -// ASSERT_NE(storage_blob, nullptr); -// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// EXPECT_EQ(input_blob_size, expected_storage_size); - -// // Verify quantization -// const uint8_t* quantized = static_cast(storage_blob); -// const float* metadata = reinterpret_cast(quantized + dim); - -// EXPECT_FLOAT_EQ(metadata[0], 0.0f); // min_val -// EXPECT_FLOAT_EQ(metadata[1], (dim - 1.0f) / 255.0f); // delta - -// // Verify first and last quantized values -// EXPECT_EQ(quantized[0], 0); -// EXPECT_EQ(quantized[dim - 1], 255); - -// // Clean up -// allocator->free_allocation(storage_blob); -// } - -// TEST(QuantPreprocessorTest, MultiPreprocessorsContainerIntegrationTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 4; -// constexpr size_t n_preprocessors = 1; -// unsigned char alignment = 32; - -// auto multiPPContainer = MultiPreprocessorsContainer(allocator, -// alignment); - -// // Add QuantPreprocessor to the container -// auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); -// ASSERT_EQ(multiPPContainer.addPreprocessor(quant_preprocessor), 0); - -// const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f}; -// size_t original_blob_size = dim * sizeof(float); - -// // Test preprocessing through container -// ProcessedBlobs processed_blobs = multiPPContainer.preprocess(input, original_blob_size); - -// const void* storage_blob = processed_blobs.getStorageBlob(); -// const void* query_blob = processed_blobs.getQueryBlob(); - -// // Verify storage blob was quantized -// ASSERT_NE(storage_blob, nullptr); -// ASSERT_NE(storage_blob, input); // Should be different from original - -// const uint8_t* quantized = static_cast(storage_blob); -// const float* metadata = reinterpret_cast(quantized + dim); - -// EXPECT_FLOAT_EQ(metadata[0], 1.0f); // min_val -// EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f); // delta - -// // Verify query blob remains float32 -// ASSERT_NE(query_blob, nullptr); -// const float* query_data = static_cast(query_blob); -// for (size_t i = 0; i < dim; i++) { -// EXPECT_FLOAT_EQ(query_data[i], input[i]); -// } - -// // Verify query blob alignment -// EXPECT_EQ(reinterpret_cast(query_blob) % alignment, 0); -// } - -// TEST(QuantPreprocessorTest, PreprocessForStorageWithPreAllocatedBlobTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 3; - -// QuantPreprocessor preprocessor(allocator, dim); - -// const float input[dim] = {2.0f, 4.0f, 6.0f}; -// size_t input_blob_size = dim * sizeof(float); - -// // Pre-allocate storage blob -// const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// void* storage_blob = allocator->allocate(expected_storage_size); - -// // Test preprocessForStorage with pre-allocated blob -// preprocessor.preprocessForStorage(input, storage_blob, input_blob_size); - -// // Verify size was updated correctly -// EXPECT_EQ(input_blob_size, expected_storage_size); - -// // Verify quantization was performed -// const uint8_t* quantized = static_cast(storage_blob); -// const float* metadata = reinterpret_cast(quantized + dim); - -// EXPECT_FLOAT_EQ(metadata[0], 2.0f); // min_val -// EXPECT_FLOAT_EQ(metadata[1], 4.0f / 255.0f); // delta = (6.0f - 2.0f) / 255.0f - -// // Verify quantized values -// EXPECT_EQ(quantized[0], 0); // (2.0f - 2.0f) / delta = 0 -// EXPECT_EQ(quantized[1], 127); // (4.0f - 2.0f) / delta ≈ 127 -// EXPECT_EQ(quantized[2], 255); // (6.0f - 2.0f) / delta = 255 - -// // Clean up -// allocator->free_allocation(storage_blob); -// } - -// TEST(QuantPreprocessorTest, PreprocessQueryWithPreAllocatedBlobTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 3; - -// QuantPreprocessor preprocessor(allocator, dim); - -// const float input[dim] = {1.5f, 2.5f, 3.5f}; -// size_t query_blob_size = dim * sizeof(float); -// unsigned char alignment = 16; - -// // Pre-allocate query blob -// void* query_blob = allocator->allocate_aligned(query_blob_size, alignment); - -// // Test preprocessQuery with pre-allocated blob -// preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment); - -// // Verify size remains unchanged -// EXPECT_EQ(query_blob_size, dim * sizeof(float)); +// Test with all identical values (delta = 0 case) +TEST(PreprocessorsTest, QuantizationIdenticalValuesTest) { + // Test with all same values + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + constexpr size_t n_preprocessors = 1; + constexpr size_t alignment = 5; + constexpr size_t elements = 5; + constexpr size_t original_blob_size = elements * sizeof(float); + auto original_blob_alloc = allocator->allocate_unique(original_blob_size); + float *original_blob = static_cast(original_blob_alloc.get()); + for (size_t i = 0; i < elements; i++) { + original_blob[i] = 1.0f; // All values are the same + } + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(quant_preprocessor); + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_blob, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); -// // Verify data was copied correctly (no quantization) -// const float* query_data = static_cast(query_blob); -// for (size_t i = 0; i < dim; i++) { -// EXPECT_FLOAT_EQ(query_data[i], input[i]); -// } + constexpr size_t quantized_blob_bytes_count = + elements * sizeof(uint8_t) + 2 * sizeof(float); + uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; + quant_preprocessor->quantize(original_blob, expected_processed_blob); + EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), + expected_processed_blob, + quantized_blob_bytes_count)); + } +} -// // Clean up -// allocator->free_allocation(query_blob); -// } +TEST(PreprocessorsTest, QuantizationInPlaceTest) { + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + constexpr size_t alignment = 5; + constexpr size_t dim = 5; + constexpr size_t n_preprocessors = 2; + constexpr size_t original_blob_size = dim * sizeof(float); + constexpr size_t storage_bytes_count = dim * sizeof(uint8_t) + 2 * sizeof(float); -// TEST(QuantPreprocessorTest, ZeroDimensionEdgeCaseTest) { -// std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); -// constexpr size_t dim = 1; // Minimum valid dimension + // Create a float array with known values + float original_data[dim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; -// QuantPreprocessor preprocessor(allocator, dim); + auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim); + auto dummy_preprocessor = + new (allocator) dummyPreprocessors::DummyStoragePreprocessor(allocator, 0.0f); + auto multiPPContainer = + MultiPreprocessorsContainer(allocator, alignment); + multiPPContainer.addPreprocessor(dummy_preprocessor); + multiPPContainer.addPreprocessor(quant_preprocessor); + { + ProcessedBlobs processed_blobs = + multiPPContainer.preprocess(original_data, original_blob_size); + const void *storage_blob = processed_blobs.getStorageBlob(); + const void *query_blob = processed_blobs.getQueryBlob(); + // blobs should point to the same memory slot + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); -// const float input[dim] = {42.0f}; + // Verify the quantization results + const uint8_t *quantized = static_cast(storage_blob); + const float *metadata = reinterpret_cast(quantized + dim); -// const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float); -// auto quantized_blob = allocator->allocate_unique(expected_blob_size); -// uint8_t* quantized = static_cast(quantized_blob.get()); + // Extract metadata + float min_val = metadata[0]; + float delta = metadata[1]; -// preprocessor.quantize(input, quantized); + // Check min_val is correct (should be 1.0f) + ASSERT_FLOAT_EQ(min_val, 1.0f); -// // Extract metadata -// const float* metadata = reinterpret_cast(quantized + dim); -// const float min_val = metadata[0]; -// const float delta = metadata[1]; + // Check delta is correct ((5.0f - 1.0f) / 255.0f) + ASSERT_FLOAT_EQ(delta, 4.0f / 255.0f); -// // With single value, min == max, so delta should be 1.0f -// EXPECT_FLOAT_EQ(min_val, 42.0f); -// EXPECT_FLOAT_EQ(delta, 1.0f); -// EXPECT_EQ(quantized[0], 0); // Single value quantizes to 0 -// } + // Check quantized values + for (size_t i = 0; i < dim; i++) { + uint8_t expected = + static_cast(std::round((original_data[i] - min_val) / delta)); + ASSERT_EQ(quantized[i], expected); + } + } +} \ No newline at end of file From 59fb16df35dbb588f8f0c12e9b851ff40546b572 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Wed, 4 Jun 2025 10:16:16 +0300 Subject: [PATCH 17/22] added tests for coverege --- .../spaces/computer/preprocessor_container.h | 4 +- src/VecSim/spaces/computer/preprocessors.h | 14 +- tests/unit/test_components.cpp | 123 ++++++++++++------ 3 files changed, 91 insertions(+), 50 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h index 915c3b83a..e9ed08f8a 100644 --- a/src/VecSim/spaces/computer/preprocessor_container.h +++ b/src/VecSim/spaces/computer/preprocessor_container.h @@ -172,8 +172,8 @@ MultiPreprocessorsContainer::preprocess(const void *o void *storage_blob = nullptr; void *query_blob = nullptr; - // Sepreated variables for the storage blob size and query_blob_size, - // in case we need to change their sizes to different values. + // Use of separate variables for the storage_blob_size and query_blob_size, in case we need to + // change their sizes to different values. size_t storage_blob_size = input_blob_size; size_t query_blob_size = input_blob_size; diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 26811f946..cf0f265f3 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -168,10 +168,10 @@ class QuantPreprocessor : public PreprocessorInterface { QuantPreprocessor(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim), storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) { - } // quantized + min + delta{} + } // quantized + min + delta // Helper function to perform quantization. This function is used by both preprocess and - // supports in-place quantization of the storage blob. + // preprocessQuery and supports in-place quantization of the storage blob. void quantize(const float *input, uint8_t *quantized) const { assert(input && quantized); // Find min and max values @@ -274,10 +274,6 @@ class QuantPreprocessor : public PreprocessorInterface { void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size, unsigned char alignment) const override { // No-op: queries remain as float32 - if (blob == nullptr) { - blob = this->allocator->allocate_aligned(query_blob_size, alignment); - memcpy(blob, original_blob, query_blob_size); - } } void preprocessQueryInPlace(void *blob, size_t input_blob_size, @@ -288,15 +284,13 @@ class QuantPreprocessor : public PreprocessorInterface { void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override { assert(original_blob); - assert(input_blob_size >= storage_bytes_count); + assert(input_blob_size >= storage_bytes_count && + "Input buffer too small for in-place quantization"); // Only quantize in-place if input buffer is large enough if (input_blob_size >= storage_bytes_count) { quantize(static_cast(original_blob), static_cast(original_blob)); - } else { - // Fallback: this shouldn't happen if caller allocated correctly - assert(false && "Input buffer too small for in-place quantization"); } } diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 96a925d39..6c350104d 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -1044,8 +1044,8 @@ TEST(PreprocessorsTest, QuantizationTest) { const float *qexpected_processed_metadata = reinterpret_cast(expected_processed_blob + elements); // Check that the min and delta values are close enough - ASSERT_NEAR(storage_blob_metadata[0], qexpected_processed_metadata[0], 1e-6); - ASSERT_NEAR(storage_blob_metadata[1], qexpected_processed_metadata[1], 1e-6); + ASSERT_FLOAT_EQ(storage_blob_metadata[0], qexpected_processed_metadata[0]); + ASSERT_FLOAT_EQ(storage_blob_metadata[1], qexpected_processed_metadata[1]); float min = storage_blob_metadata[0]; float delta = storage_blob_metadata[1]; @@ -1202,41 +1202,6 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { } } -// Test with all identical values (delta = 0 case) -TEST(PreprocessorsTest, QuantizationIdenticalValuesTest) { - // Test with all same values - std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); - constexpr size_t n_preprocessors = 1; - constexpr size_t alignment = 5; - constexpr size_t elements = 5; - constexpr size_t original_blob_size = elements * sizeof(float); - auto original_blob_alloc = allocator->allocate_unique(original_blob_size); - float *original_blob = static_cast(original_blob_alloc.get()); - for (size_t i = 0; i < elements; i++) { - original_blob[i] = 1.0f; // All values are the same - } - auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements); - auto multiPPContainer = - MultiPreprocessorsContainer(allocator, alignment); - multiPPContainer.addPreprocessor(quant_preprocessor); - { - ProcessedBlobs processed_blobs = - multiPPContainer.preprocess(original_blob, original_blob_size); - const void *storage_blob = processed_blobs.getStorageBlob(); - const void *query_blob = processed_blobs.getQueryBlob(); - // blobs should point to the same memory slot - ASSERT_NE(storage_blob, nullptr); - ASSERT_NE(storage_blob, query_blob); - - constexpr size_t quantized_blob_bytes_count = - elements * sizeof(uint8_t) + 2 * sizeof(float); - uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0}; - quant_preprocessor->quantize(original_blob, expected_processed_blob); - EXPECT_NO_FATAL_FAILURE(CompareVectors(static_cast(storage_blob), - expected_processed_blob, - quantized_blob_bytes_count)); - } -} TEST(PreprocessorsTest, QuantizationInPlaceTest) { std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); @@ -1286,4 +1251,86 @@ TEST(PreprocessorsTest, QuantizationInPlaceTest) { ASSERT_EQ(quantized[i], expected); } } -} \ No newline at end of file +} + +// Test the backward compatibility of the preprocess method with a single input_blob_size parameter +TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) { + using namespace dummyPreprocessors; + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + + constexpr size_t dim = 4; + unsigned char alignment = 5; + float initial_value = 1.0f; + const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value}; + size_t original_blob_size = sizeof(original_blob); + + // Create a preprocessor that modifies both storage and query blobs + auto mixed_preprocessor = new (allocator) + DummyMixedPreprocessor(allocator, 7.0f, 2.0f); + + // Test the backward compatibility method (single input_blob_size) + void *storage_blob = nullptr; + void *query_blob = nullptr; + size_t input_blob_size = original_blob_size; + + // Call the backward compatibility version of preprocess + mixed_preprocessor->preprocess(original_blob, storage_blob, query_blob, input_blob_size, alignment); + + // Verify that both blobs were allocated and processed correctly + ASSERT_NE(storage_blob, nullptr); + ASSERT_NE(query_blob, nullptr); + ASSERT_NE(storage_blob, query_blob); + + // Verify that the input_blob_size was updated correctly + ASSERT_EQ(input_blob_size, original_blob_size); + + // Verify that the storage blob was processed correctly + ASSERT_EQ(((const float *)storage_blob)[0], initial_value + 7.0f); + + // Verify that the query blob was processed correctly + ASSERT_EQ(((const float *)query_blob)[0], initial_value + 2.0f); + + // Verify that the query blob is aligned + unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; + ASSERT_EQ(address_alignment, 0); +} + +// Test the QuantPreprocessor's in-place quantization error handling when buffer is too small +TEST(PreprocessorsTest, QuantPreprocessorInPlaceTooSmallBuffer) { + std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); + + constexpr size_t dim = 4; + constexpr size_t required_storage_bytes = dim * sizeof(uint8_t) + 2 * sizeof(float); + constexpr size_t too_small_size = required_storage_bytes - 1; // One byte too small + + // Create a QuantPreprocessor + auto quant_preprocessor = QuantPreprocessor(allocator, dim); + + // Allocate a buffer that's too small for in-place quantization + auto buffer_alloc = allocator->allocate_unique(too_small_size); + float *buffer = static_cast(buffer_alloc.get()); + + // Initialize buffer with some values + for (size_t i = 0; i < too_small_size / sizeof(float); i++) { + buffer[i] = static_cast(i); + } + + // In debug builds, this should trigger an assertion failure + EXPECT_DEATH(quant_preprocessor.preprocessStorageInPlace(buffer, too_small_size), + "Input buffer too small for in-place quantization"); + + // Test with correct size buffer to ensure normal operation works + constexpr size_t correct_size = required_storage_bytes; + auto correct_buffer_alloc = allocator->allocate_unique(correct_size); + float *correct_buffer = static_cast(correct_buffer_alloc.get()); + + // Initialize buffer with some values + for (size_t i = 0; i < dim; i++) { + correct_buffer[i] = static_cast(i); + } + + // This should not assert + EXPECT_NO_FATAL_FAILURE( + quant_preprocessor.preprocessStorageInPlace(correct_buffer, correct_size)); + +} From 866d8cbaf3522610661e921e8bf345eecec74a9e Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Wed, 4 Jun 2025 10:27:03 +0300 Subject: [PATCH 18/22] format --- tests/unit/test_components.cpp | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 6c350104d..643b02e57 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -1202,7 +1202,6 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) { } } - TEST(PreprocessorsTest, QuantizationInPlaceTest) { std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); constexpr size_t alignment = 5; @@ -1265,8 +1264,7 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) { size_t original_blob_size = sizeof(original_blob); // Create a preprocessor that modifies both storage and query blobs - auto mixed_preprocessor = new (allocator) - DummyMixedPreprocessor(allocator, 7.0f, 2.0f); + auto mixed_preprocessor = new (allocator) DummyMixedPreprocessor(allocator, 7.0f, 2.0f); // Test the backward compatibility method (single input_blob_size) void *storage_blob = nullptr; @@ -1274,22 +1272,23 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) { size_t input_blob_size = original_blob_size; // Call the backward compatibility version of preprocess - mixed_preprocessor->preprocess(original_blob, storage_blob, query_blob, input_blob_size, alignment); + mixed_preprocessor->preprocess(original_blob, storage_blob, query_blob, input_blob_size, + alignment); // Verify that both blobs were allocated and processed correctly ASSERT_NE(storage_blob, nullptr); ASSERT_NE(query_blob, nullptr); ASSERT_NE(storage_blob, query_blob); - + // Verify that the input_blob_size was updated correctly ASSERT_EQ(input_blob_size, original_blob_size); - + // Verify that the storage blob was processed correctly ASSERT_EQ(((const float *)storage_blob)[0], initial_value + 7.0f); - + // Verify that the query blob was processed correctly ASSERT_EQ(((const float *)query_blob)[0], initial_value + 2.0f); - + // Verify that the query blob is aligned unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; ASSERT_EQ(address_alignment, 0); @@ -1298,39 +1297,38 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) { // Test the QuantPreprocessor's in-place quantization error handling when buffer is too small TEST(PreprocessorsTest, QuantPreprocessorInPlaceTooSmallBuffer) { std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); - + constexpr size_t dim = 4; constexpr size_t required_storage_bytes = dim * sizeof(uint8_t) + 2 * sizeof(float); constexpr size_t too_small_size = required_storage_bytes - 1; // One byte too small - + // Create a QuantPreprocessor auto quant_preprocessor = QuantPreprocessor(allocator, dim); - + // Allocate a buffer that's too small for in-place quantization auto buffer_alloc = allocator->allocate_unique(too_small_size); float *buffer = static_cast(buffer_alloc.get()); - + // Initialize buffer with some values for (size_t i = 0; i < too_small_size / sizeof(float); i++) { buffer[i] = static_cast(i); } - + // In debug builds, this should trigger an assertion failure EXPECT_DEATH(quant_preprocessor.preprocessStorageInPlace(buffer, too_small_size), "Input buffer too small for in-place quantization"); - + // Test with correct size buffer to ensure normal operation works constexpr size_t correct_size = required_storage_bytes; auto correct_buffer_alloc = allocator->allocate_unique(correct_size); float *correct_buffer = static_cast(correct_buffer_alloc.get()); - + // Initialize buffer with some values for (size_t i = 0; i < dim; i++) { correct_buffer[i] = static_cast(i); } - + // This should not assert EXPECT_NO_FATAL_FAILURE( quant_preprocessor.preprocessStorageInPlace(correct_buffer, correct_size)); - } From ec4a3a7d3d42e586ab8bbed0df1d887d78592b13 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Wed, 4 Jun 2025 10:48:10 +0300 Subject: [PATCH 19/22] Remove the tests --- tests/unit/test_components.cpp | 39 ---------------------------------- 1 file changed, 39 deletions(-) diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 643b02e57..5afe2873d 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -1293,42 +1293,3 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) { unsigned char address_alignment = (uintptr_t)(query_blob) % alignment; ASSERT_EQ(address_alignment, 0); } - -// Test the QuantPreprocessor's in-place quantization error handling when buffer is too small -TEST(PreprocessorsTest, QuantPreprocessorInPlaceTooSmallBuffer) { - std::shared_ptr allocator = VecSimAllocator::newVecsimAllocator(); - - constexpr size_t dim = 4; - constexpr size_t required_storage_bytes = dim * sizeof(uint8_t) + 2 * sizeof(float); - constexpr size_t too_small_size = required_storage_bytes - 1; // One byte too small - - // Create a QuantPreprocessor - auto quant_preprocessor = QuantPreprocessor(allocator, dim); - - // Allocate a buffer that's too small for in-place quantization - auto buffer_alloc = allocator->allocate_unique(too_small_size); - float *buffer = static_cast(buffer_alloc.get()); - - // Initialize buffer with some values - for (size_t i = 0; i < too_small_size / sizeof(float); i++) { - buffer[i] = static_cast(i); - } - - // In debug builds, this should trigger an assertion failure - EXPECT_DEATH(quant_preprocessor.preprocessStorageInPlace(buffer, too_small_size), - "Input buffer too small for in-place quantization"); - - // Test with correct size buffer to ensure normal operation works - constexpr size_t correct_size = required_storage_bytes; - auto correct_buffer_alloc = allocator->allocate_unique(correct_size); - float *correct_buffer = static_cast(correct_buffer_alloc.get()); - - // Initialize buffer with some values - for (size_t i = 0; i < dim; i++) { - correct_buffer[i] = static_cast(i); - } - - // This should not assert - EXPECT_NO_FATAL_FAILURE( - quant_preprocessor.preprocessStorageInPlace(correct_buffer, correct_size)); -} From 985c2c8e3a52c5c1a89284cb5920cb98343a8b96 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Wed, 4 Jun 2025 16:26:45 +0300 Subject: [PATCH 20/22] Fix test --- src/VecSim/spaces/computer/preprocessors.h | 33 ++++++++-------------- tests/unit/test_components.cpp | 9 +++--- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index cf0f265f3..6c5366661 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -24,8 +24,7 @@ class PreprocessorInterface : public VecsimBaseObject { : VecsimBaseObject(allocator) {} // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results // are the same size. - // TODO: Add query_blob_size as a parameter to the preprocess functions, to allow - // different sizes for storage and query blobs in the future, if needed. + // Use the the overload below for different sizes. virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, size_t &input_blob_size, unsigned char alignment) const = 0; virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob, @@ -149,19 +148,15 @@ class CosinePreprocessor : public PreprocessorInterface { const size_t processed_bytes_count; }; -// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision -// of uint8_t. The quantization is done by finding the minimum and maximum values of the input -// vector, and then scaling the values to fit in the range of [0, 255]. The quantized values are -// then stored in a uint8_t array. [Quantized values, min, delta] Quantized Blob size = -// dim_elements * sizeof(int8) + 2 * sizeof(float) delta = (max_val - min_val) / 255.0f -// quantized_v[i] = (v[i] - min_val) / delta -// preprocessForStorage: -// if null: -// - We are not reallocing because it will be released after the query. -// Allocate quantized blob size -// 3. Compute (min, delta) and quantize to the quantized blob or in place. -// preprocessQuery: No-op – queries arrive as float32 and remain uncompressed - +/* + * QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision + * representation using uint8_t. It stores the quantized values along with metadata (min value and + * scaling factor) in a single contiguous blob. The quantized values are then stored in a uint8_t + * array. The quantization is done by finding the minimum and maximum values of the input vector, + * and then scaling the values to fit in the range of [0, 255]. The quantized values are then + * stored in a uint8_t array. The quantized blob size is: + * dim_elements * sizeof(int8) + 2 * sizeof(float) +*/ class QuantPreprocessor : public PreprocessorInterface { public: // Constructor for backward compatibility (single blob size) @@ -259,7 +254,7 @@ class QuantPreprocessor : public PreprocessorInterface { void preprocessForStorage(const void *original_blob, void *&blob, size_t &input_blob_size) const override { // Allocate quantized blob if needed - if (blob == nullptr) { + if (!blob) { blob = this->allocator->allocate(storage_bytes_count); } @@ -279,7 +274,6 @@ class QuantPreprocessor : public PreprocessorInterface { void preprocessQueryInPlace(void *blob, size_t input_blob_size, unsigned char alignment) const override { // No-op: queries remain as float32 - assert(blob); } void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override { @@ -287,11 +281,8 @@ class QuantPreprocessor : public PreprocessorInterface { assert(input_blob_size >= storage_bytes_count && "Input buffer too small for in-place quantization"); - // Only quantize in-place if input buffer is large enough - if (input_blob_size >= storage_bytes_count) { - quantize(static_cast(original_blob), + quantize(static_cast(original_blob), static_cast(original_blob)); - } } private: diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp index 5afe2873d..917167e41 100644 --- a/tests/unit/test_components.cpp +++ b/tests/unit/test_components.cpp @@ -1243,11 +1243,10 @@ TEST(PreprocessorsTest, QuantizationInPlaceTest) { // Check delta is correct ((5.0f - 1.0f) / 255.0f) ASSERT_FLOAT_EQ(delta, 4.0f / 255.0f); - // Check quantized values - for (size_t i = 0; i < dim; i++) { - uint8_t expected = - static_cast(std::round((original_data[i] - min_val) / delta)); - ASSERT_EQ(quantized[i], expected); + // dequantize and verify the values + for (size_t i = 0; i < dim; ++i) { + float dequantized_value = min_val + quantized[i] * delta; + ASSERT_NEAR(dequantized_value, original_data[i], 0.01); } } } From b7aeb2d81c3d37ede7cf1ef9b4dd3aa45c093365 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Thu, 5 Jun 2025 09:54:50 +0300 Subject: [PATCH 21/22] change to input output type --- src/VecSim/spaces/computer/preprocessors.h | 52 +++++++++++----------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 6c5366661..3b58a72fc 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -149,25 +149,27 @@ class CosinePreprocessor : public PreprocessorInterface { }; /* - * QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision - * representation using uint8_t. It stores the quantized values along with metadata (min value and - * scaling factor) in a single contiguous blob. The quantized values are then stored in a uint8_t - * array. The quantization is done by finding the minimum and maximum values of the input vector, - * and then scaling the values to fit in the range of [0, 255]. The quantized values are then - * stored in a uint8_t array. The quantized blob size is: - * dim_elements * sizeof(int8) + 2 * sizeof(float) + * QuantPreprocessor is a preprocessor that quantizes the input vector of INPUT_TYPE (float) to a + * lower precision representation using OUTPUT_TYPE (uint8_t). It stores the quantized values along + * with metadata (min value and scaling factor) in a single contiguous blob. The quantized values + * are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and + * maximum values of the input vector, and then scaling the values to fit in the range of [0, 255]. + * The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(float) */ class QuantPreprocessor : public PreprocessorInterface { + using INPUT_TYPE = float; + using OUTPUT_TYPE = uint8_t; + public: // Constructor for backward compatibility (single blob size) QuantPreprocessor(std::shared_ptr allocator, size_t dim) : PreprocessorInterface(allocator), dim(dim), - storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) { + storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)) { } // quantized + min + delta // Helper function to perform quantization. This function is used by both preprocess and // preprocessQuery and supports in-place quantization of the storage blob. - void quantize(const float *input, uint8_t *quantized) const { + void quantize(const INPUT_TYPE *input, OUTPUT_TYPE *quantized) const { assert(input && quantized); // Find min and max values auto [min_val, max_val] = find_min_max(input); @@ -179,7 +181,7 @@ class QuantPreprocessor : public PreprocessorInterface { // Quantize the values for (size_t i = 0; i < this->dim; i++) { - quantized[i] = static_cast(std::round((input[i] - min_val) * inv_delta)); + quantized[i] = static_cast(std::round((input[i] - min_val) * inv_delta)); } float *metadata = reinterpret_cast(quantized + this->dim); @@ -202,12 +204,12 @@ class QuantPreprocessor : public PreprocessorInterface { // CASE 1: STORAGE BLOB NEEDS ALLOCATION if (!storage_blob) { // Allocate aligned memory for the quantized storage blob - storage_blob = static_cast( + storage_blob = static_cast( this->allocator->allocate_aligned(this->storage_bytes_count, alignment)); // Quantize directly from original data - const float *input = static_cast(original_blob); - quantize(input, static_cast(storage_blob)); + const INPUT_TYPE *input = static_cast(original_blob); + quantize(input, static_cast(storage_blob)); } // CASE 2: STORAGE BLOB EXISTS else { @@ -219,8 +221,8 @@ class QuantPreprocessor : public PreprocessorInterface { this->allocator->allocate_aligned(this->storage_bytes_count, alignment); // Quantize from the shared blob (query_blob) to the new storage blob - quantize(static_cast(query_blob), - static_cast(new_storage)); + quantize(static_cast(query_blob), + static_cast(new_storage)); // Update storage_blob to point to the new memory storage_blob = new_storage; @@ -230,20 +232,20 @@ class QuantPreprocessor : public PreprocessorInterface { // Check if storage blob needs resizing if (storage_blob_size < this->storage_bytes_count) { // Allocate new storage with correct size - uint8_t *new_storage = static_cast( + OUTPUT_TYPE *new_storage = static_cast( this->allocator->allocate_aligned(this->storage_bytes_count, alignment)); // Quantize from old storage to new storage - quantize(static_cast(storage_blob), - static_cast(new_storage)); + quantize(static_cast(storage_blob), + static_cast(new_storage)); // Free old storage and update pointer this->allocator->free_allocation(storage_blob); storage_blob = new_storage; } else { // Storage blob is large enough, quantize in-place - quantize(static_cast(storage_blob), - static_cast(storage_blob)); + quantize(static_cast(storage_blob), + static_cast(storage_blob)); } } } @@ -259,8 +261,8 @@ class QuantPreprocessor : public PreprocessorInterface { } // Cast to appropriate types - const float *input = static_cast(original_blob); - uint8_t *quantized = static_cast(blob); + const INPUT_TYPE *input = static_cast(original_blob); + OUTPUT_TYPE *quantized = static_cast(blob); quantize(input, quantized); input_blob_size = storage_bytes_count; @@ -281,12 +283,12 @@ class QuantPreprocessor : public PreprocessorInterface { assert(input_blob_size >= storage_bytes_count && "Input buffer too small for in-place quantization"); - quantize(static_cast(original_blob), - static_cast(original_blob)); + quantize(static_cast(original_blob), + static_cast(original_blob)); } private: - std::pair find_min_max(const float *input) const { + std::pair find_min_max(const INPUT_TYPE *input) const { float min_val = input[0]; float max_val = input[0]; From a8aee990c2aa9c230d5ba9174a87791414686bf1 Mon Sep 17 00:00:00 2001 From: Dor Forer Date: Thu, 5 Jun 2025 10:14:31 +0300 Subject: [PATCH 22/22] format --- src/VecSim/spaces/computer/preprocessors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h index 3b58a72fc..f981b15a8 100644 --- a/src/VecSim/spaces/computer/preprocessors.h +++ b/src/VecSim/spaces/computer/preprocessors.h @@ -155,7 +155,7 @@ class CosinePreprocessor : public PreprocessorInterface { * are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and * maximum values of the input vector, and then scaling the values to fit in the range of [0, 255]. * The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE) + 2 * sizeof(float) -*/ + */ class QuantPreprocessor : public PreprocessorInterface { using INPUT_TYPE = float; using OUTPUT_TYPE = uint8_t;