From 7a6c10aef36247323ff3301a708d9b81a1abc5dc Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 8 May 2025 04:22:44 +0000
Subject: [PATCH 01/22] make CreatePreprocessorsContainerParams templated and
 move it to header file

add logic to determine the cosine processed_bytes_count

add processed_bytes_count to the cosine component

change blob_size param to the original blob size instead of processed_bytes_count in all preprocessing functions.
---
 src/VecSim/CMakeLists.txt                     |  1 -
 .../components/components_factory.cpp         | 23 --------
 .../components/components_factory.h           | 48 ++++++++++++++++-
 .../components/preprocessors_factory.h        |  5 +-
 .../computer/preprocessor_container.cpp       | 34 +++++++-----
 .../spaces/computer/preprocessor_container.h  | 52 +++++++++----------
 src/VecSim/spaces/computer/preprocessors.h    | 47 +++++++++--------
 tests/unit/test_components.cpp                |  9 +++-
 8 files changed, 126 insertions(+), 93 deletions(-)
 delete mode 100644 src/VecSim/index_factories/components/components_factory.cpp

diff --git a/src/VecSim/CMakeLists.txt b/src/VecSim/CMakeLists.txt
index 557851018..ab64066c9 100644
--- a/src/VecSim/CMakeLists.txt
+++ b/src/VecSim/CMakeLists.txt
@@ -31,7 +31,6 @@ add_library(VectorSimilarity ${VECSIM_LIBTYPE}
     index_factories/tiered_factory.cpp
     index_factories/svs_factory.cpp
     index_factories/index_factory.cpp
-    index_factories/components/components_factory.cpp
     algorithms/hnsw/visited_nodes_handler.cpp
     vec_sim.cpp
     vec_sim_debug.cpp
diff --git a/src/VecSim/index_factories/components/components_factory.cpp b/src/VecSim/index_factories/components/components_factory.cpp
deleted file mode 100644
index caf485005..000000000
--- a/src/VecSim/index_factories/components/components_factory.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2006-Present, Redis Ltd.
- * All rights reserved.
- *
- * Licensed under your choice of the Redis Source Available License 2.0
- * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
- * GNU Affero General Public License v3 (AGPLv3).
- */
-#include "VecSim/index_factories/components/components_factory.h"
-
-PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric metric, size_t dim,
-                                                                bool is_normalized,
-                                                                unsigned char alignment) {
-    // If the index metric is Cosine, and is_normalized == true, we will skip normalizing vectors
-    // and query blobs.
-    VecSimMetric pp_metric;
-    if (is_normalized && metric == VecSimMetric_Cosine) {
-        pp_metric = VecSimMetric_IP;
-    } else {
-        pp_metric = metric;
-    }
-    return {.metric = pp_metric, .dim = dim, .alignment = alignment};
-}
diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h
index 1135d85cc..23d4d93c7 100644
--- a/src/VecSim/index_factories/components/components_factory.h
+++ b/src/VecSim/index_factories/components/components_factory.h
@@ -14,9 +14,53 @@
 #include "VecSim/index_factories/components/preprocessors_factory.h"
 #include "VecSim/spaces/computer/calculator.h"
 
+/**
+ * @brief Creates parameters for a preprocessors container based on the given metric, dimension,
+ *        normalization flag, and alignment.
+ *
+ * @tparam DataType The data type of the vector elements (e.g., float, int).
+ * @param metric The similarity metric to be used (e.g., Cosine, Inner Product).
+ * @param dim The dimensionality of the vectors.
+ * @param is_normalized A flag indicating whether the vectors are already normalized.
+ * @param alignment The alignment requirement for the data.
+ * @return A PreprocessorsContainerParams object containing the processed parameters:
+ *         - metric: The adjusted metric based on the input and normalization flag.
+ *         - dim: The dimensionality of the vectors.
+ *         - alignment: The alignment requirement for the data.
+ *         - processed_bytes_count: The size of the processed data blob in bytes.
+ *
+ * @details
+ * If the metric is Cosine and the data type is integral, the processed bytes count may include
+ * additional space for normalization (currently commented out). If the vectors are already
+ * normalized (is_normalized == true), the metric is adjusted to Inner Product (IP) to skip
+ * redundant normalization during preprocessing.
+ */
+template <typename DataType>
 PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric metric, size_t dim,
                                                                 bool is_normalized,
-                                                                unsigned char alignment);
+                                                                unsigned char alignment) {
+    // By default the processed blob size is the same as the original blob size.
+    size_t processed_bytes_count = dim * sizeof(DataType);
+
+    // If the index metric is Cosine, and
+    VecSimMetric pp_metric = metric;
+    if (metric == VecSimMetric_Cosine) {
+        // if metric is cosine and DataType is integral, the processed_bytes_count includes the
+        // norm appended to the vector.
+        if (std::is_integral<DataType>::value) {
+            processed_bytes_count += sizeof(float);
+        }
+        // if is_normalized == true, we will enforce skipping normalizing vector and query blobs by
+        // setting the metric to IP.
+        if (is_normalized) {
+            pp_metric = VecSimMetric_IP;
+        }
+    }
+    return {.metric = pp_metric,
+            .dim = dim,
+            .alignment = alignment,
+            .processed_bytes_count = processed_bytes_count};
+}
 
 template <typename DataType, typename DistType>
 IndexComponents<DataType, DistType>
@@ -29,7 +73,7 @@ CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric m
     auto indexCalculator = new (allocator) DistanceCalculatorCommon<DistType>(allocator, distFunc);
 
     PreprocessorsContainerParams ppParams =
-        CreatePreprocessorsContainerParams(metric, dim, is_normalized, alignment);
+        CreatePreprocessorsContainerParams<DataType>(metric, dim, is_normalized, alignment);
     auto preprocessors = CreatePreprocessorsContainer<DataType>(allocator, ppParams);
 
     return {indexCalculator, preprocessors};
diff --git a/src/VecSim/index_factories/components/preprocessors_factory.h b/src/VecSim/index_factories/components/preprocessors_factory.h
index ba8d2a286..c11e935d8 100644
--- a/src/VecSim/index_factories/components/preprocessors_factory.h
+++ b/src/VecSim/index_factories/components/preprocessors_factory.h
@@ -15,6 +15,7 @@ struct PreprocessorsContainerParams {
     VecSimMetric metric;
     size_t dim;
     unsigned char alignment;
+    size_t processed_bytes_count;
 };
 
 template <typename DataType>
@@ -25,8 +26,8 @@ CreatePreprocessorsContainer(std::shared_ptr<VecSimAllocator> allocator,
     if (params.metric == VecSimMetric_Cosine) {
         auto multiPPContainer =
             new (allocator) MultiPreprocessorsContainer<DataType, 1>(allocator, params.alignment);
-        auto cosine_preprocessor =
-            new (allocator) CosinePreprocessor<DataType>(allocator, params.dim);
+        auto cosine_preprocessor = new (allocator)
+            CosinePreprocessor<DataType>(allocator, params.dim, params.processed_bytes_count);
         int next_valid_pp_index = multiPPContainer->addPreprocessor(cosine_preprocessor);
         UNUSED(next_valid_pp_index);
         assert(next_valid_pp_index != -1 && "Cosine preprocessor was not added correctly");
diff --git a/src/VecSim/spaces/computer/preprocessor_container.cpp b/src/VecSim/spaces/computer/preprocessor_container.cpp
index 8b61e1107..9328a7c85 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.cpp
+++ b/src/VecSim/spaces/computer/preprocessor_container.cpp
@@ -8,38 +8,46 @@
  */
 #include "VecSim/spaces/computer/preprocessor_container.h"
 
+/**
+ *
+=========================== TODO =================
+original_blob_size should be a reference so it can be changed when changes in the pp chain.
+
+
+*/
 ProcessedBlobs PreprocessorsContainerAbstract::preprocess(const void *original_blob,
-                                                          size_t processed_bytes_count) const {
-    return ProcessedBlobs(preprocessForStorage(original_blob, processed_bytes_count),
-                          preprocessQuery(original_blob, processed_bytes_count));
+                                                          size_t original_blob_size) const {
+    return ProcessedBlobs(preprocessForStorage(original_blob, original_blob_size),
+                          preprocessQuery(original_blob, original_blob_size));
 }
 
 MemoryUtils::unique_blob
 PreprocessorsContainerAbstract::preprocessForStorage(const void *original_blob,
-                                                     size_t processed_bytes_count) const {
+                                                     size_t original_blob_size) const {
     return wrapWithDummyDeleter(const_cast<void *>(original_blob));
 }
 
-MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(
-    const void *original_blob, size_t processed_bytes_count, bool force_copy) const {
-    return maybeCopyToAlignedMem(original_blob, processed_bytes_count, force_copy);
+MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(const void *original_blob,
+                                                                         size_t original_blob_size,
+                                                                         bool force_copy) const {
+    return maybeCopyToAlignedMem(original_blob, original_blob_size, force_copy);
 }
 
 void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob,
-                                                            size_t processed_bytes_count) const {}
+                                                            size_t input_blob_bytes_count) const {}
 
 void PreprocessorsContainerAbstract::preprocessStorageInPlace(void *blob,
-                                                              size_t processed_bytes_count) const {}
+                                                              size_t input_blob_bytes_count) const {
+}
 
 MemoryUtils::unique_blob PreprocessorsContainerAbstract::maybeCopyToAlignedMem(
-    const void *original_blob, size_t blob_bytes_count, bool force_copy) const {
+    const void *original_blob, size_t original_blob_size, bool force_copy) const {
     bool needs_copy =
         force_copy || (this->alignment && ((uintptr_t)original_blob % this->alignment != 0));
 
     if (needs_copy) {
-        auto aligned_mem = this->allocator->allocate_aligned(blob_bytes_count, this->alignment);
-        // TODO: handle original_blob_size != processed_bytes_count
-        memcpy(aligned_mem, original_blob, blob_bytes_count);
+        auto aligned_mem = this->allocator->allocate_aligned(original_blob_size, this->alignment);
+        memcpy(aligned_mem, original_blob, original_blob_size);
         return this->wrapAllocated(aligned_mem);
     }
 
diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index 5d80a321f..70591bd4b 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -22,19 +22,17 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
     PreprocessorsContainerAbstract(std::shared_ptr<VecSimAllocator> allocator,
                                    unsigned char alignment)
         : VecsimBaseObject(allocator), alignment(alignment) {}
-    virtual ProcessedBlobs preprocess(const void *original_blob,
-                                      size_t processed_bytes_count) const;
+    virtual ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const;
 
     virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
-                                                          size_t processed_bytes_count) const;
+                                                          size_t original_blob_size) const;
 
     virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
-                                                     size_t processed_bytes_count,
+                                                     size_t original_blob_size,
                                                      bool force_copy = false) const;
 
-    virtual void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const;
-
-    virtual void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const;
+    virtual void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const;
+    virtual void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const;
 
     unsigned char getAlignment() const { return alignment; }
 
@@ -43,7 +41,7 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
 
     // Allocate and copy the blob only if the original blob is not aligned.
     MemoryUtils::unique_blob maybeCopyToAlignedMem(const void *original_blob,
-                                                   size_t blob_bytes_count,
+                                                   size_t original_blob_size,
                                                    bool force_copy = false) const;
 
     MemoryUtils::unique_blob wrapAllocated(void *blob) const {
@@ -82,19 +80,17 @@ class MultiPreprocessorsContainer : public PreprocessorsContainerAbstract {
      */
     int addPreprocessor(PreprocessorInterface *preprocessor);
 
-    ProcessedBlobs preprocess(const void *original_blob,
-                              size_t processed_bytes_count) const override;
+    ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const override;
 
     MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
-                                                  size_t processed_bytes_count) const override;
+                                                  size_t original_blob_size) const override;
 
-    MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
-                                             size_t processed_bytes_count,
+    MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t original_blob_size,
                                              bool force_copy = false) const override;
 
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const override;
+    void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const override;
 
-    void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override;
+    void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override;
 
 #ifdef BUILD_TESTS
     std::array<PreprocessorInterface *, n_preprocessors> getPreprocessors() const {
@@ -160,11 +156,11 @@ int MultiPreprocessorsContainer<DataType, n_preprocessors>::addPreprocessor(
 
 template <typename DataType, size_t n_preprocessors>
 ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(
-    const void *original_blob, size_t processed_bytes_count) const {
+    const void *original_blob, size_t original_blob_size) const {
     // No preprocessors were added yet.
     if (preprocessors[0] == nullptr) {
         // query might need to be aligned
-        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, processed_bytes_count);
+        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size);
         return ProcessedBlobs(
             std::move(Base::wrapWithDummyDeleter(const_cast<void *>(original_blob))),
             std::move(query_ptr));
@@ -175,7 +171,7 @@ ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preproces
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocess(original_blob, storage_blob, query_blob, processed_bytes_count,
+        pp->preprocess(original_blob, storage_blob, query_blob, original_blob_size,
                        this->alignment);
     }
     // At least one blob was allocated.
@@ -194,7 +190,7 @@ ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preproces
 
     if (query_blob == nullptr) { // we processed only the storage
         // query might need to be aligned
-        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, processed_bytes_count);
+        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size);
         return ProcessedBlobs(std::move(this->wrapAllocated(storage_blob)), std::move(query_ptr));
     }
 
@@ -206,13 +202,13 @@ ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preproces
 template <typename DataType, size_t n_preprocessors>
 MemoryUtils::unique_blob
 MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessForStorage(
-    const void *original_blob, size_t processed_bytes_count) const {
+    const void *original_blob, size_t original_blob_size) const {
 
     void *storage_blob = nullptr;
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
+        pp->preprocessForStorage(original_blob, storage_blob, original_blob_size);
     }
 
     return storage_blob ? std::move(this->wrapAllocated(storage_blob))
@@ -221,40 +217,40 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessForStorage(
 
 template <typename DataType, size_t n_preprocessors>
 MemoryUtils::unique_blob MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQuery(
-    const void *original_blob, size_t processed_bytes_count, bool force_copy) const {
+    const void *original_blob, size_t original_blob_size, bool force_copy) const {
 
     void *query_blob = nullptr;
     for (auto pp : preprocessors) {
         if (!pp)
             break;
         // modifies the memory in place
-        pp->preprocessQuery(original_blob, query_blob, processed_bytes_count, this->alignment);
+        pp->preprocessQuery(original_blob, query_blob, original_blob_size, this->alignment);
     }
     return query_blob ? std::move(this->wrapAllocated(query_blob))
-                      : std::move(this->maybeCopyToAlignedMem(original_blob, processed_bytes_count,
+                      : std::move(this->maybeCopyToAlignedMem(original_blob, original_blob_size,
                                                               force_copy));
 }
 
 template <typename DataType, size_t n_preprocessors>
 void MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQueryInPlace(
-    void *blob, size_t processed_bytes_count) const {
+    void *blob, size_t input_blob_bytes_count) const {
 
     for (auto pp : preprocessors) {
         if (!pp)
             break;
         // modifies the memory in place
-        pp->preprocessQueryInPlace(blob, processed_bytes_count, this->alignment);
+        pp->preprocessQueryInPlace(blob, input_blob_bytes_count, this->alignment);
     }
 }
 
 template <typename DataType, size_t n_preprocessors>
 void MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessStorageInPlace(
-    void *blob, size_t processed_bytes_count) const {
+    void *blob, size_t input_blob_bytes_count) const {
 
     for (auto pp : preprocessors) {
         if (!pp)
             break;
         // modifies the memory in place
-        pp->preprocessStorageInPlace(blob, processed_bytes_count);
+        pp->preprocessStorageInPlace(blob, input_blob_bytes_count);
     }
 }
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index db2de5ad0..d97cf57ca 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -29,29 +29,32 @@ class PreprocessorInterface : public VecsimBaseObject {
     // down the preprocessors pipeline (such as in quantization preprocessor that compresses the
     // vector).
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t processed_bytes_count, unsigned char alignment) const = 0;
+                            size_t original_blob_size, unsigned char alignment) const = 0;
     virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
-                                      size_t processed_bytes_count) const = 0;
+                                      size_t original_blob_size) const = 0;
     virtual void preprocessQuery(const void *original_blob, void *&query_blob,
-                                 size_t processed_bytes_count, unsigned char alignment) const = 0;
-    virtual void preprocessQueryInPlace(void *original_blob, size_t processed_bytes_count,
+                                 size_t original_blob_size, unsigned char alignment) const = 0;
+    virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_bytes_count,
                                         unsigned char alignment) const = 0;
     virtual void preprocessStorageInPlace(void *original_blob,
-                                          size_t processed_bytes_count) const = 0;
+                                          size_t input_blob_bytes_count) const = 0;
 };
 
 template <typename DataType>
 class CosinePreprocessor : public PreprocessorInterface {
 public:
-    CosinePreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
+    // This preprocessor assumes storage_blob and query_blob
+    // both are preprocessed in the same way, and yield a blob in the same size.
+    CosinePreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim,
+                       size_t processed_bytes_count)
         : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
-          dim(dim) {}
+          dim(dim), processed_bytes_count(processed_bytes_count) {}
 
     // If a blob (storage_blob or query_blob) is not nullptr, it means a previous preprocessor
     // already allocated and processed it. So, we process it inplace. If it's null, we need to
     // allocate memory for it and copy the original_blob to it.
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
+                    size_t original_blob_size, unsigned char alignment) const override {
 
         // Case 1: Blobs are different (one might be null, or both are allocated and processed
         // separately).
@@ -59,12 +62,10 @@ class CosinePreprocessor : public PreprocessorInterface {
             // If one of them is null, allocate memory for it and copy the original_blob to it.
             if (storage_blob == nullptr) {
                 storage_blob = this->allocator->allocate(processed_bytes_count);
-                // TODO: handle original_blob_size != processed_bytes_count
-                memcpy(storage_blob, original_blob, processed_bytes_count);
+                memcpy(storage_blob, original_blob, original_blob_size);
             } else if (query_blob == nullptr) {
                 query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-                // TODO: handle original_blob_size != processed_bytes_count
-                memcpy(query_blob, original_blob, processed_bytes_count);
+                memcpy(query_blob, original_blob, original_blob_size);
             }
 
             // Normalize both blobs.
@@ -74,8 +75,7 @@ class CosinePreprocessor : public PreprocessorInterface {
             if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set
                                          // storage_blob to point to it.
                 query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-                // TODO: handle original_blob_size != processed_bytes_count
-                memcpy(query_blob, original_blob, processed_bytes_count);
+                memcpy(query_blob, original_blob, original_blob_size);
                 storage_blob = query_blob;
             }
             // normalize one of them (since they point to the same memory).
@@ -84,37 +84,40 @@ class CosinePreprocessor : public PreprocessorInterface {
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
+                              size_t original_blob_size) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate(processed_bytes_count);
-            // TODO: handle original_blob_size != processed_bytes_count
-            memcpy(blob, original_blob, processed_bytes_count);
+            memcpy(blob, original_blob, original_blob_size);
         }
         normalize_func(blob, this->dim);
     }
 
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t original_blob_size,
                          unsigned char alignment) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            // TODO: handle original_blob_size != processed_bytes_count
-            memcpy(blob, original_blob, processed_bytes_count);
+            memcpy(blob, original_blob, original_blob_size);
         }
         normalize_func(blob, this->dim);
     }
 
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+    void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count,
                                 unsigned char alignment) const override {
         assert(blob);
+        // TODO: replace with a debug assert and add values of input_blob_bytes_count and
+        // processed_bytes_count
+        assert(input_blob_bytes_count == this->processed_bytes_count);
         normalize_func(blob, this->dim);
     }
 
-    void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {
+    void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override {
         assert(blob);
+        assert(input_blob_bytes_count == this->processed_bytes_count);
         normalize_func(blob, this->dim);
     }
 
 private:
     spaces::normalizeVector_f<DataType> normalize_func;
     const size_t dim;
+    const size_t processed_bytes_count;
 };
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 95679a906..5c0f3c0de 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -471,12 +471,15 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
     float value_to_add_storage = 7.0f;
     float value_to_add_query = 2.0f;
     const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+    // TODo: change this test so that original_blob_size != processed_bytes_count
+    constexpr size_t processed_bytes_count = sizeof(original_blob);
 
     auto multiPPContainer =
         MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
 
     // adding cosine preprocessor
-    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
+    auto cosine_preprocessor =
+        new (allocator) CosinePreprocessor<float>(allocator, dim, processed_bytes_count);
     multiPPContainer.addPreprocessor(cosine_preprocessor);
     {
         ProcessedBlobs processed_blobs =
@@ -536,6 +539,7 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
     float value_to_add_storage = 7.0f;
     float value_to_add_query = 2.0f;
     const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+    constexpr size_t processed_bytes_count = sizeof(original_blob);
 
     // Creating multi preprocessors container
     auto mixed_preprocessor = new (allocator)
@@ -568,7 +572,8 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
     }
 
     // adding cosine preprocessor
-    auto cosine_preprocessor = new (allocator) CosinePreprocessor<float>(allocator, dim);
+    auto cosine_preprocessor =
+        new (allocator) CosinePreprocessor<float>(allocator, dim, processed_bytes_count);
     multiPPContainer.addPreprocessor(cosine_preprocessor);
     {
         ProcessedBlobs processed_blobs =

From cc4281a76b906b54f99c90d7bd2a417ec68d8f07 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 8 May 2025 13:43:23 +0000
Subject: [PATCH 02/22] plan for the tests

---
 tests/unit/test_components.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 5c0f3c0de..57b7e578e 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -602,3 +602,20 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
     }
     // The preprocessors should be released by the preprocessors container.
 }
+
+TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+    // add cosine pp that changes the original blob size
+    // add a pp that preprocesses the normalized blob (same size)
+    // add a pp that changes the storage_blob size, but not changing the query_blob size
+}
+
+
+TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+    // add a pp that changes the storage_blob size, but not changing the query_blob size
+    // add a pp that preprocesses the normalized blob (same size)
+    // add cosine pp that changes the original blob size
+}
+
+TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+    // pp multi container where cosine is only needed for the query blob (not supported yet)
+}

From 86a44a967b9f46b72bafd6f3151ec06cf43bb569 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 12 May 2025 10:59:40 +0000
Subject: [PATCH 03/22] rename original_blob_size-> input_blob_size

add DummyChangeAllocSizePreprocessor class to test pp that changes the size

add tests
one will fail because the input blob size is not updated
---
 .../spaces/computer/preprocessor_container.h  |   2 +
 src/VecSim/spaces/computer/preprocessors.h    |  37 ++-
 tests/unit/test_components.cpp                | 280 +++++++++++++++++-
 3 files changed, 284 insertions(+), 35 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index 70591bd4b..80453ce19 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -22,11 +22,13 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
     PreprocessorsContainerAbstract(std::shared_ptr<VecSimAllocator> allocator,
                                    unsigned char alignment)
         : VecsimBaseObject(allocator), alignment(alignment) {}
+    // It is assumed that the resulted query blob is aligned.
     virtual ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const;
 
     virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
                                                           size_t original_blob_size) const;
 
+    // It is assumed that the resulted query blob is aligned.
     virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
                                                      size_t original_blob_size,
                                                      bool force_copy = false) const;
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index d97cf57ca..515a94ef0 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -29,15 +29,14 @@ class PreprocessorInterface : public VecsimBaseObject {
     // down the preprocessors pipeline (such as in quantization preprocessor that compresses the
     // vector).
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t original_blob_size, unsigned char alignment) const = 0;
+                            size_t input_blob_size, unsigned char alignment) const = 0;
     virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
-                                      size_t original_blob_size) const = 0;
+                                      size_t input_blob_size) const = 0;
     virtual void preprocessQuery(const void *original_blob, void *&query_blob,
-                                 size_t original_blob_size, unsigned char alignment) const = 0;
-    virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_bytes_count,
+                                 size_t input_blob_size, unsigned char alignment) const = 0;
+    virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_size,
                                         unsigned char alignment) const = 0;
-    virtual void preprocessStorageInPlace(void *original_blob,
-                                          size_t input_blob_bytes_count) const = 0;
+    virtual void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const = 0;
 };
 
 template <typename DataType>
@@ -54,7 +53,7 @@ class CosinePreprocessor : public PreprocessorInterface {
     // already allocated and processed it. So, we process it inplace. If it's null, we need to
     // allocate memory for it and copy the original_blob to it.
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t original_blob_size, unsigned char alignment) const override {
+                    size_t input_blob_size, unsigned char alignment) const override {
 
         // Case 1: Blobs are different (one might be null, or both are allocated and processed
         // separately).
@@ -62,10 +61,10 @@ class CosinePreprocessor : public PreprocessorInterface {
             // If one of them is null, allocate memory for it and copy the original_blob to it.
             if (storage_blob == nullptr) {
                 storage_blob = this->allocator->allocate(processed_bytes_count);
-                memcpy(storage_blob, original_blob, original_blob_size);
+                memcpy(storage_blob, original_blob, input_blob_size);
             } else if (query_blob == nullptr) {
                 query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-                memcpy(query_blob, original_blob, original_blob_size);
+                memcpy(query_blob, original_blob, input_blob_size);
             }
 
             // Normalize both blobs.
@@ -75,7 +74,7 @@ class CosinePreprocessor : public PreprocessorInterface {
             if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set
                                          // storage_blob to point to it.
                 query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-                memcpy(query_blob, original_blob, original_blob_size);
+                memcpy(query_blob, original_blob, input_blob_size);
                 storage_blob = query_blob;
             }
             // normalize one of them (since they point to the same memory).
@@ -84,35 +83,35 @@ class CosinePreprocessor : public PreprocessorInterface {
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t original_blob_size) const override {
+                              size_t input_blob_size) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, original_blob_size);
+            memcpy(blob, original_blob, input_blob_size);
         }
         normalize_func(blob, this->dim);
     }
 
-    void preprocessQuery(const void *original_blob, void *&blob, size_t original_blob_size,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size,
                          unsigned char alignment) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, original_blob_size);
+            memcpy(blob, original_blob, input_blob_size);
         }
         normalize_func(blob, this->dim);
     }
 
-    void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count,
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
         assert(blob);
-        // TODO: replace with a debug assert and add values of input_blob_bytes_count and
+        // TODO: replace with a debug assert and add values of input_blob_size and
         // processed_bytes_count
-        assert(input_blob_bytes_count == this->processed_bytes_count);
+        assert(input_blob_size == this->processed_bytes_count);
         normalize_func(blob, this->dim);
     }
 
-    void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override {
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {
         assert(blob);
-        assert(input_blob_bytes_count == this->processed_bytes_count);
+        assert(input_blob_size == this->processed_bytes_count);
         normalize_func(blob, this->dim);
     }
 
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 57b7e578e..99441666f 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -12,6 +12,7 @@
 #include "VecSim/spaces/computer/preprocessor_container.h"
 #include "VecSim/spaces/computer/calculator.h"
 #include "unit_test_utils.h"
+#include "tests_utils.h"
 
 class IndexCalculatorTest : public ::testing::Test {};
 namespace dummyCalcultor {
@@ -57,8 +58,8 @@ enum pp_mode { STORAGE_ONLY, QUERY_ONLY, BOTH, EMPTY };
 template <typename DataType>
 class DummyStoragePreprocessor : public PreprocessorInterface {
 public:
-    DummyStoragePreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                             int value_to_add_query = 0)
+    DummyStoragePreprocessor(std::shared_ptr<VecSimAllocator> allocator,
+                             DataType value_to_add_storage, DataType value_to_add_query = 0)
         : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
           value_to_add_query(value_to_add_query) {
         if (!value_to_add_query)
@@ -94,8 +95,8 @@ class DummyStoragePreprocessor : public PreprocessorInterface {
     }
 
 private:
-    int value_to_add_storage;
-    int value_to_add_query;
+    DataType value_to_add_storage;
+    DataType value_to_add_query;
 };
 
 // Dummy query preprocessor
@@ -191,6 +192,124 @@ class DummyMixedPreprocessor : public PreprocessorInterface {
     int value_to_add_storage;
     int value_to_add_query;
 };
+
+// TODO: test increase allocation size ( we don't really need another pp class for this)
+// A preprocessor that changes the allocation size of the blobs in the same manner.
+// set excess bytes to (char)2
+template <typename DataType>
+class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
+private:
+    size_t processed_bytes_count;
+    static constexpr unsigned char excess_value = 2;
+
+public:
+    DummyChangeAllocSizePreprocessor(std::shared_ptr<VecSimAllocator> allocator,
+                                     size_t processed_bytes_count)
+        : PreprocessorInterface(allocator), processed_bytes_count(processed_bytes_count) {}
+
+    static constexpr unsigned char getExcessValue() { return excess_value; }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t input_blob_size, unsigned char alignment) const override {
+        // if the blobs are equal,
+        if (storage_blob == query_blob) {
+            preprocessGeneral(original_blob, storage_blob, input_blob_size, alignment);
+            query_blob = storage_blob;
+            return;
+        }
+        // if the blobs are not equal
+        if (input_blob_size < processed_bytes_count) {
+            auto alloc_and_process = [&](void *&blob) {
+                auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                if (blob == nullptr) {
+                    memcpy(new_blob, original_blob, input_blob_size);
+                } else {
+                    // copy the blob to the new blob, and free the old one
+                    memcpy(new_blob, blob, input_blob_size);
+                    this->allocator->free_allocation(blob);
+                }
+                blob = new_blob;
+                memset((char *)blob + input_blob_size, excess_value,
+                       processed_bytes_count - input_blob_size);
+            };
+
+            alloc_and_process(storage_blob);
+            alloc_and_process(query_blob);
+        } else {
+            auto alloc_and_process = [&](void *&blob) {
+                if (blob == nullptr) {
+                    blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                    memcpy(blob, original_blob, processed_bytes_count);
+                } else {
+                    memset((char *)blob + processed_bytes_count, excess_value,
+                           input_blob_size - processed_bytes_count);
+                }
+            };
+
+            alloc_and_process(storage_blob);
+            alloc_and_process(query_blob);
+        }
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t input_blob_size) const override {
+
+        this->preprocessGeneral(original_blob, blob, processed_bytes_count);
+    }
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
+                                unsigned char alignment) const override {
+        // only supported if the blob in already large enough
+        assert(input_blob_size >= processed_bytes_count);
+        // set excess bytes to 0
+        memset((char *)blob + processed_bytes_count, excess_value,
+               input_blob_size - processed_bytes_count);
+    }
+
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {
+        // only supported if the blob in already large enough
+        assert(input_blob_size >= processed_bytes_count);
+        // set excess bytes to 0
+        memset((char *)blob + processed_bytes_count, excess_value,
+               input_blob_size - processed_bytes_count);
+    }
+    void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size,
+                         unsigned char alignment) const override {
+        this->preprocessGeneral(original_blob, blob, processed_bytes_count, alignment);
+    }
+
+private:
+    void preprocessGeneral(const void *original_blob, void *&blob, size_t input_blob_size,
+                           unsigned char alignment = 0) const {
+        // if the size of the input is not enough.
+        if (input_blob_size < processed_bytes_count) {
+            // calloc doesn't have an alignment version, so we need to allocate aligned memory and
+            // cset the excess bytes to 0.
+            auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            if (blob == nullptr) {
+                // copy thr original blob
+                memcpy(new_blob, original_blob, input_blob_size);
+            } else {
+                // copy the blob to the new blob
+                memcpy(new_blob, blob, input_blob_size);
+                this->allocator->free_allocation(blob);
+            }
+            blob = new_blob;
+            // set excess bytes to 0
+            memset((char *)blob + input_blob_size, excess_value,
+                   processed_bytes_count - input_blob_size);
+        } else { // input size is larger than output
+            if (blob == nullptr) {
+                blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                memcpy(blob, original_blob, processed_bytes_count);
+            } else {
+                // set excess bytes to 0
+                memset((char *)blob + processed_bytes_count, excess_value,
+                       input_blob_size - processed_bytes_count);
+            }
+        }
+    }
+    // TODO: change** the refernce**  input_blob_size to processed_bytes_count
+};
 } // namespace dummyPreprocessors
 
 TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) {
@@ -207,8 +326,6 @@ TEST(PreprocessorsTest, PreprocessorsTestBasicAlignmentTest) {
         unsigned char address_alignment = (uintptr_t)(aligned_query.get()) % alignment;
         ASSERT_EQ(address_alignment, 0);
     }
-
-    // The index computer is responsible for releasing the distance calculator.
 }
 
 template <unsigned char alignment>
@@ -602,20 +719,151 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
     }
     // The preprocessors should be released by the preprocessors container.
 }
+TEST(PreprocessorsTest, decrease_size_STORAGE_then_cosine_no_size_change) {}
+TEST(PreprocessorsTest, decrease_size_QUERY_then_cosine_no_size_change) {}
 
-TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-    // add cosine pp that changes the original blob size
-    // add a pp that preprocesses the normalized blob (same size)
-    // add a pp that changes the storage_blob size, but not changing the query_blob size
+TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 8;
+    constexpr size_t decrease_amount = 2;
+    constexpr size_t new_elem_amount = elements - decrease_amount;
+
+    // valgrind detects out of bound reads only if the considered memory is allocated on the heap,
+    // rather than on the stack.
+    constexpr size_t original_blob_size = elements * sizeof(float);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    float *original_blob = static_cast<float *>(original_blob_alloc.get());
+    test_utils::populate_float_vec(original_blob, elements);
+    constexpr size_t new_processed_bytes_count =
+        original_blob_size - decrease_amount * sizeof(float);
+
+    // Processed blob expected output
+    float expected_processed_blob[elements] = {0};
+    memcpy(expected_processed_blob, original_blob, new_processed_bytes_count);
+    // Only use half of the blob for normalization
+    VecSim_Normalize(expected_processed_blob, new_elem_amount, VecSimType_FLOAT32);
+
+    // A pp that decreases the original blob size
+    auto decrease_size_preprocessor = new (allocator)
+        DummyChangeAllocSizePreprocessor<float>(allocator, new_processed_bytes_count);
+    // A normalize pp
+    auto cosine_preprocessor = new (allocator)
+        CosinePreprocessor<float>(allocator, new_elem_amount, new_processed_bytes_count);
+    // Creating multi preprocessors container
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(decrease_size_preprocessor);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_EQ(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+
+        // memory should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        // They need to be allocated and processed
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(static_cast<const float *>(storage_blob),
+                                                      expected_processed_blob, new_elem_amount));
+    }
 }
 
+TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
 
-TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-    // add a pp that changes the storage_blob size, but not changing the query_blob size
-    // add a pp that preprocesses the normalized blob (same size)
-    // add cosine pp that changes the original blob size
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 8;
+
+    // valgrind detects out of bound reads only if the considered memory is allocated on the heap,
+    // rather than on the stack.
+    constexpr size_t original_blob_size = elements * sizeof(int8_t);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    int8_t *original_blob = static_cast<int8_t *>(original_blob_alloc.get());
+    test_utils::populate_int8_vec(original_blob, elements);
+    // size after normalization
+    constexpr size_t normalized_blob_bytes_count = original_blob_size + sizeof(float);
+    // size after increasing pp
+    constexpr size_t elements_addition = 3;
+    constexpr size_t final_blob_bytes_count =
+        normalized_blob_bytes_count + elements_addition * sizeof(unsigned char);
+
+    // Processed blob expected output
+    int8_t expected_processed_blob[elements + sizeof(float) + 3] = {0};
+    memcpy(expected_processed_blob, original_blob, original_blob_size);
+    // normalize the original blob
+    VecSim_Normalize(expected_processed_blob, elements, VecSimType_INT8);
+    // add the values of the pp that increases the blob size
+    unsigned char added_value = DummyChangeAllocSizePreprocessor<int8_t>::getExcessValue();
+    for (size_t i = 0; i < elements_addition; i++) {
+        expected_processed_blob[elements + sizeof(float) + i] = added_value;
+    }
+
+    // A normalize pp - will allocate the blob + sizeof(float)
+    auto cosine_preprocessor = new (allocator)
+        CosinePreprocessor<int8_t>(allocator, elements, normalized_blob_bytes_count);
+    // A pp that will increase the *normalized* blob size
+    auto increase_size_preprocessor =
+        new (allocator) DummyChangeAllocSizePreprocessor<int8_t>(allocator, final_blob_bytes_count);
+    // Creating multi preprocessors container
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    multiPPContainer.addPreprocessor(increase_size_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_EQ(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+
+        // memory should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        // They need to be allocated and processed
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<int8_t>(static_cast<const int8_t *>(storage_blob),
+                                                       expected_processed_blob,
+                                                       final_blob_bytes_count));
+    }
 }
 
-TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-    // pp multi container where cosine is only needed for the query blob (not supported yet)
+TEST(PreprocessorsTest, cosine_then_change_size) {
+    // cosine (not changing)
+    // pp that changes the blob size
 }
+
+TEST(PreprocessorsTest, cosine_change_then_pp_change) {
+    // cosine ( changing)
+    // pp that also changes the blob size
+}
+
+// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+//     // add cosine pp that changes the original blob size
+//     // add a pp that preprocesses the normalized blob (same size)
+//     // add a pp that changes the storage_blob size, but not changing the query_blob size
+// }
+
+// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+//     // add a pp that changes the storage_blob size, but not changing the query_blob size
+//     // add a pp that preprocesses the normalized blob (same size)
+//     // add cosine pp that changes the original blob size
+// }
+
+// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
+//     // pp multi container where cosine is only needed for the query blob (not supported yet)
+// }

From 3e15e762d8fa77434b8afab6115cc9a4d80ca037 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 12 May 2025 14:44:28 +0000
Subject: [PATCH 04/22] preprocessors now change the blob size

---
 .../components/components_factory.h           |  3 +-
 .../computer/preprocessor_container.cpp       | 30 ++++------
 .../spaces/computer/preprocessor_container.h  | 56 +++++++++----------
 src/VecSim/spaces/computer/preprocessors.h    | 23 +++++---
 4 files changed, 54 insertions(+), 58 deletions(-)

diff --git a/src/VecSim/index_factories/components/components_factory.h b/src/VecSim/index_factories/components/components_factory.h
index 23d4d93c7..066bd0bca 100644
--- a/src/VecSim/index_factories/components/components_factory.h
+++ b/src/VecSim/index_factories/components/components_factory.h
@@ -31,7 +31,7 @@
  *
  * @details
  * If the metric is Cosine and the data type is integral, the processed bytes count may include
- * additional space for normalization (currently commented out). If the vectors are already
+ * additional space for normalization. If the vectors are already
  * normalized (is_normalized == true), the metric is adjusted to Inner Product (IP) to skip
  * redundant normalization during preprocessing.
  */
@@ -42,7 +42,6 @@ PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric met
     // By default the processed blob size is the same as the original blob size.
     size_t processed_bytes_count = dim * sizeof(DataType);
 
-    // If the index metric is Cosine, and
     VecSimMetric pp_metric = metric;
     if (metric == VecSimMetric_Cosine) {
         // if metric is cosine and DataType is integral, the processed_bytes_count includes the
diff --git a/src/VecSim/spaces/computer/preprocessor_container.cpp b/src/VecSim/spaces/computer/preprocessor_container.cpp
index 9328a7c85..0ec44e36d 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.cpp
+++ b/src/VecSim/spaces/computer/preprocessor_container.cpp
@@ -8,46 +8,38 @@
  */
 #include "VecSim/spaces/computer/preprocessor_container.h"
 
-/**
- *
-=========================== TODO =================
-original_blob_size should be a reference so it can be changed when changes in the pp chain.
-
-
-*/
 ProcessedBlobs PreprocessorsContainerAbstract::preprocess(const void *original_blob,
-                                                          size_t original_blob_size) const {
-    return ProcessedBlobs(preprocessForStorage(original_blob, original_blob_size),
-                          preprocessQuery(original_blob, original_blob_size));
+                                                          size_t input_blob_size) const {
+    return ProcessedBlobs(preprocessForStorage(original_blob, input_blob_size),
+                          preprocessQuery(original_blob, input_blob_size));
 }
 
 MemoryUtils::unique_blob
 PreprocessorsContainerAbstract::preprocessForStorage(const void *original_blob,
-                                                     size_t original_blob_size) const {
+                                                     size_t input_blob_size) const {
     return wrapWithDummyDeleter(const_cast<void *>(original_blob));
 }
 
 MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(const void *original_blob,
-                                                                         size_t original_blob_size,
+                                                                         size_t input_blob_size,
                                                                          bool force_copy) const {
-    return maybeCopyToAlignedMem(original_blob, original_blob_size, force_copy);
+    return maybeCopyToAlignedMem(original_blob, input_blob_size, force_copy);
 }
 
 void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob,
-                                                            size_t input_blob_bytes_count) const {}
+                                                            size_t input_blob_size) const {}
 
 void PreprocessorsContainerAbstract::preprocessStorageInPlace(void *blob,
-                                                              size_t input_blob_bytes_count) const {
-}
+                                                              size_t input_blob_size) const {}
 
 MemoryUtils::unique_blob PreprocessorsContainerAbstract::maybeCopyToAlignedMem(
-    const void *original_blob, size_t original_blob_size, bool force_copy) const {
+    const void *original_blob, size_t input_blob_size, bool force_copy) const {
     bool needs_copy =
         force_copy || (this->alignment && ((uintptr_t)original_blob % this->alignment != 0));
 
     if (needs_copy) {
-        auto aligned_mem = this->allocator->allocate_aligned(original_blob_size, this->alignment);
-        memcpy(aligned_mem, original_blob, original_blob_size);
+        auto aligned_mem = this->allocator->allocate_aligned(input_blob_size, this->alignment);
+        memcpy(aligned_mem, original_blob, input_blob_size);
         return this->wrapAllocated(aligned_mem);
     }
 
diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index 80453ce19..cd2f0dec8 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -23,18 +23,18 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
                                    unsigned char alignment)
         : VecsimBaseObject(allocator), alignment(alignment) {}
     // It is assumed that the resulted query blob is aligned.
-    virtual ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const;
+    virtual ProcessedBlobs preprocess(const void *original_blob, size_t input_blob_size) const;
 
     virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
-                                                          size_t original_blob_size) const;
+                                                          size_t input_blob_size) const;
 
     // It is assumed that the resulted query blob is aligned.
     virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
-                                                     size_t original_blob_size,
+                                                     size_t input_blob_size,
                                                      bool force_copy = false) const;
 
-    virtual void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const;
-    virtual void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const;
+    virtual void preprocessQueryInPlace(void *blob, size_t input_blob_size) const;
+    virtual void preprocessStorageInPlace(void *blob, size_t input_blob_size) const;
 
     unsigned char getAlignment() const { return alignment; }
 
@@ -43,7 +43,7 @@ class PreprocessorsContainerAbstract : public VecsimBaseObject {
 
     // Allocate and copy the blob only if the original blob is not aligned.
     MemoryUtils::unique_blob maybeCopyToAlignedMem(const void *original_blob,
-                                                   size_t original_blob_size,
+                                                   size_t input_blob_size,
                                                    bool force_copy = false) const;
 
     MemoryUtils::unique_blob wrapAllocated(void *blob) const {
@@ -82,17 +82,17 @@ class MultiPreprocessorsContainer : public PreprocessorsContainerAbstract {
      */
     int addPreprocessor(PreprocessorInterface *preprocessor);
 
-    ProcessedBlobs preprocess(const void *original_blob, size_t original_blob_size) const override;
+    ProcessedBlobs preprocess(const void *original_blob, size_t input_blob_size) const override;
 
     MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
-                                                  size_t original_blob_size) const override;
+                                                  size_t input_blob_size) const override;
 
-    MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t original_blob_size,
+    MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t input_blob_size,
                                              bool force_copy = false) const override;
 
-    void preprocessQueryInPlace(void *blob, size_t input_blob_bytes_count) const override;
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size) const override;
 
-    void preprocessStorageInPlace(void *blob, size_t input_blob_bytes_count) const override;
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override;
 
 #ifdef BUILD_TESTS
     std::array<PreprocessorInterface *, n_preprocessors> getPreprocessors() const {
@@ -157,12 +157,13 @@ int MultiPreprocessorsContainer<DataType, n_preprocessors>::addPreprocessor(
 }
 
 template <typename DataType, size_t n_preprocessors>
-ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(
-    const void *original_blob, size_t original_blob_size) const {
+ProcessedBlobs
+MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *original_blob,
+                                                                   size_t input_blob_size) const {
     // No preprocessors were added yet.
     if (preprocessors[0] == nullptr) {
         // query might need to be aligned
-        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size);
+        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, input_blob_size);
         return ProcessedBlobs(
             std::move(Base::wrapWithDummyDeleter(const_cast<void *>(original_blob))),
             std::move(query_ptr));
@@ -173,8 +174,7 @@ ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preproces
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocess(original_blob, storage_blob, query_blob, original_blob_size,
-                       this->alignment);
+        pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
     }
     // At least one blob was allocated.
 
@@ -192,7 +192,7 @@ ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preproces
 
     if (query_blob == nullptr) { // we processed only the storage
         // query might need to be aligned
-        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, original_blob_size);
+        auto query_ptr = this->maybeCopyToAlignedMem(original_blob, input_blob_size);
         return ProcessedBlobs(std::move(this->wrapAllocated(storage_blob)), std::move(query_ptr));
     }
 
@@ -204,13 +204,13 @@ ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preproces
 template <typename DataType, size_t n_preprocessors>
 MemoryUtils::unique_blob
 MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessForStorage(
-    const void *original_blob, size_t original_blob_size) const {
+    const void *original_blob, size_t input_blob_size) const {
 
     void *storage_blob = nullptr;
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocessForStorage(original_blob, storage_blob, original_blob_size);
+        pp->preprocessForStorage(original_blob, storage_blob, input_blob_size);
     }
 
     return storage_blob ? std::move(this->wrapAllocated(storage_blob))
@@ -219,40 +219,40 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessForStorage(
 
 template <typename DataType, size_t n_preprocessors>
 MemoryUtils::unique_blob MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQuery(
-    const void *original_blob, size_t original_blob_size, bool force_copy) const {
+    const void *original_blob, size_t input_blob_size, bool force_copy) const {
 
     void *query_blob = nullptr;
     for (auto pp : preprocessors) {
         if (!pp)
             break;
         // modifies the memory in place
-        pp->preprocessQuery(original_blob, query_blob, original_blob_size, this->alignment);
+        pp->preprocessQuery(original_blob, query_blob, input_blob_size, this->alignment);
     }
-    return query_blob ? std::move(this->wrapAllocated(query_blob))
-                      : std::move(this->maybeCopyToAlignedMem(original_blob, original_blob_size,
-                                                              force_copy));
+    return query_blob
+               ? std::move(this->wrapAllocated(query_blob))
+               : std::move(this->maybeCopyToAlignedMem(original_blob, input_blob_size, force_copy));
 }
 
 template <typename DataType, size_t n_preprocessors>
 void MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQueryInPlace(
-    void *blob, size_t input_blob_bytes_count) const {
+    void *blob, size_t input_blob_size) const {
 
     for (auto pp : preprocessors) {
         if (!pp)
             break;
         // modifies the memory in place
-        pp->preprocessQueryInPlace(blob, input_blob_bytes_count, this->alignment);
+        pp->preprocessQueryInPlace(blob, input_blob_size, this->alignment);
     }
 }
 
 template <typename DataType, size_t n_preprocessors>
 void MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessStorageInPlace(
-    void *blob, size_t input_blob_bytes_count) const {
+    void *blob, size_t input_blob_size) const {
 
     for (auto pp : preprocessors) {
         if (!pp)
             break;
         // modifies the memory in place
-        pp->preprocessStorageInPlace(blob, input_blob_bytes_count);
+        pp->preprocessStorageInPlace(blob, input_blob_size);
     }
 }
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 515a94ef0..b1bb4932e 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -28,12 +28,14 @@ class PreprocessorInterface : public VecsimBaseObject {
     // TODO: handle a dynamic processed_bytes_count, as the allocation size of the blob might change
     // down the preprocessors pipeline (such as in quantization preprocessor that compresses the
     // vector).
+    // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
+    // are the same size.
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t input_blob_size, unsigned char alignment) const = 0;
+                            size_t &input_blob_size, unsigned char alignment) const = 0;
     virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
-                                      size_t input_blob_size) const = 0;
+                                      size_t &input_blob_size) const = 0;
     virtual void preprocessQuery(const void *original_blob, void *&query_blob,
-                                 size_t input_blob_size, unsigned char alignment) const = 0;
+                                 size_t &input_blob_size, unsigned char alignment) const = 0;
     virtual void preprocessQueryInPlace(void *original_blob, size_t input_blob_size,
                                         unsigned char alignment) const = 0;
     virtual void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const = 0;
@@ -53,8 +55,7 @@ class CosinePreprocessor : public PreprocessorInterface {
     // already allocated and processed it. So, we process it inplace. If it's null, we need to
     // allocate memory for it and copy the original_blob to it.
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t input_blob_size, unsigned char alignment) const override {
-
+                    size_t &input_blob_size, unsigned char alignment) const override {
         // Case 1: Blobs are different (one might be null, or both are allocated and processed
         // separately).
         if (storage_blob != query_blob) {
@@ -80,31 +81,35 @@ class CosinePreprocessor : public PreprocessorInterface {
             // normalize one of them (since they point to the same memory).
             normalize_func(query_blob, this->dim);
         }
+
+        input_blob_size = processed_bytes_count;
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t input_blob_size) const override {
+                              size_t &input_blob_size) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate(processed_bytes_count);
             memcpy(blob, original_blob, input_blob_size);
         }
         normalize_func(blob, this->dim);
+        input_blob_size = processed_bytes_count;
     }
 
-    void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
         if (blob == nullptr) {
             blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
             memcpy(blob, original_blob, input_blob_size);
         }
         normalize_func(blob, this->dim);
+        input_blob_size = processed_bytes_count;
     }
 
     void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
         assert(blob);
-        // TODO: replace with a debug assert and add values of input_blob_size and
-        // processed_bytes_count
+        // TODO: replace with a debug assert and log the exact values of input_blob_size and
+        // processed_bytes_count to improve observability
         assert(input_blob_size == this->processed_bytes_count);
         normalize_func(blob, this->dim);
     }

From 1863722a77914b8bfaeb6dc1b95d8186cf3efcb8 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 12 May 2025 14:46:01 +0000
Subject: [PATCH 05/22] fix test

---
 tests/unit/test_components.cpp | 80 ++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 37 deletions(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 99441666f..a483e75fa 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -67,29 +67,29 @@ class DummyStoragePreprocessor : public PreprocessorInterface {
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
+                    size_t &input_blob_size, unsigned char alignment) const override {
 
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
+        this->preprocessForStorage(original_blob, storage_blob, input_blob_size);
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
+                              size_t &input_blob_size) const override {
         // If the blob was not allocated yet, allocate it.
         if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
+            blob = this->allocator->allocate(input_blob_size);
+            memcpy(blob, original_blob, input_blob_size);
         }
         static_cast<DataType *>(blob)[0] += value_to_add_storage;
     }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {}
 
-    void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {
         assert(blob);
         static_cast<DataType *>(blob)[0] += value_to_add_storage;
     }
 
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
         /* do nothing*/
     }
@@ -112,25 +112,25 @@ class DummyQueryPreprocessor : public PreprocessorInterface {
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+                    size_t &input_blob_size, unsigned char alignment) const override {
+        this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment);
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
+                              size_t &input_blob_size) const override {
         /* do nothing*/
     }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
         static_cast<DataType *>(blob)[0] += value_to_add_query;
     }
-    void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {}
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {}
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
         // If the blob was not allocated yet, allocate it.
         if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
+            blob = this->allocator->allocate_aligned(input_blob_size, alignment);
+            memcpy(blob, original_blob, input_blob_size);
         }
         static_cast<DataType *>(blob)[0] += value_to_add_query;
     }
@@ -149,41 +149,41 @@ class DummyMixedPreprocessor : public PreprocessorInterface {
         : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
           value_to_add_query(value_to_add_query) {}
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
+                    size_t &input_blob_size, unsigned char alignment) const override {
 
         // One blob was already allocated by a previous preprocessor(s) that process both blobs the
         // same. The blobs are pointing to the same memory, we need to allocate another memory slot
         // to split them.
         if ((storage_blob == query_blob) && (query_blob != nullptr)) {
-            storage_blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(storage_blob, query_blob, processed_bytes_count);
+            storage_blob = this->allocator->allocate(input_blob_size);
+            memcpy(storage_blob, query_blob, input_blob_size);
         }
 
         // Either both are nullptr or they are pointing to different memory slots. Both cases are
         // handled by the designated functions.
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+        this->preprocessForStorage(original_blob, storage_blob, input_blob_size);
+        this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment);
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
+                              size_t &input_blob_size) const override {
         // If the blob was not allocated yet, allocate it.
         if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
+            blob = this->allocator->allocate(input_blob_size);
+            memcpy(blob, original_blob, input_blob_size);
         }
         static_cast<DataType *>(blob)[0] += value_to_add_storage;
     }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {}
 
-    void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {}
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {}
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
         // If the blob was not allocated yet, allocate it.
         if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
+            blob = this->allocator->allocate_aligned(input_blob_size, alignment);
+            memcpy(blob, original_blob, input_blob_size);
         }
         static_cast<DataType *>(blob)[0] += value_to_add_query;
     }
@@ -210,14 +210,16 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
     static constexpr unsigned char getExcessValue() { return excess_value; }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t input_blob_size, unsigned char alignment) const override {
+                    size_t &input_blob_size, unsigned char alignment) const override {
         // if the blobs are equal,
         if (storage_blob == query_blob) {
             preprocessGeneral(original_blob, storage_blob, input_blob_size, alignment);
             query_blob = storage_blob;
             return;
         }
-        // if the blobs are not equal
+        // The blobs are not equal
+
+        // If the input blob size is not enough
         if (input_blob_size < processed_bytes_count) {
             auto alloc_and_process = [&](void *&blob) {
                 auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
@@ -249,12 +251,15 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
             alloc_and_process(storage_blob);
             alloc_and_process(query_blob);
         }
+
+        // update the input blob size
+        input_blob_size = processed_bytes_count;
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t input_blob_size) const override {
+                              size_t &input_blob_size) const override {
 
-        this->preprocessGeneral(original_blob, blob, processed_bytes_count);
+        this->preprocessGeneral(original_blob, blob, input_blob_size);
     }
     void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
@@ -272,13 +277,13 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
         memset((char *)blob + processed_bytes_count, excess_value,
                input_blob_size - processed_bytes_count);
     }
-    void preprocessQuery(const void *original_blob, void *&blob, size_t input_blob_size,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
-        this->preprocessGeneral(original_blob, blob, processed_bytes_count, alignment);
+        this->preprocessGeneral(original_blob, blob, input_blob_size, alignment);
     }
 
 private:
-    void preprocessGeneral(const void *original_blob, void *&blob, size_t input_blob_size,
+    void preprocessGeneral(const void *original_blob, void *&blob, size_t &input_blob_size,
                            unsigned char alignment = 0) const {
         // if the size of the input is not enough.
         if (input_blob_size < processed_bytes_count) {
@@ -307,8 +312,9 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
                        input_blob_size - processed_bytes_count);
             }
         }
+        // update the input blob size
+        input_blob_size = processed_bytes_count;
     }
-    // TODO: change** the refernce**  input_blob_size to processed_bytes_count
 };
 } // namespace dummyPreprocessors
 

From 55837baadd00c82a013421d12fdadb80c85a61a7 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 12 May 2025 14:47:21 +0000
Subject: [PATCH 06/22] fix tiered test

---
 tests/unit/test_hnsw_tiered.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index a6540dba5..0b7157c0c 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -4205,52 +4205,52 @@ class PreprocessorDoubleValue : public PreprocessorInterface {
     PreprocessorDoubleValue(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim) {}
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t processed_bytes_count, unsigned char alignment) const override {
+                    size_t &input_blob_size, unsigned char alignment) const override {
 
         // One blob was already allocated by a previous preprocessor(s) that process both blobs the
         // same. The blobs are pointing to the same memory, we need to allocate another memory slot
         // to split them.
         if ((storage_blob == query_blob) && (query_blob != nullptr)) {
-            storage_blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(storage_blob, query_blob, processed_bytes_count);
+            storage_blob = this->allocator->allocate(input_blob_size);
+            memcpy(storage_blob, query_blob, input_blob_size);
         }
 
         // Either both are nullptr or they are pointing to different memory slots. Both cases are
         // handled by the designated functions.
-        this->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
-        this->preprocessQuery(original_blob, query_blob, processed_bytes_count, alignment);
+        this->preprocessForStorage(original_blob, storage_blob, input_blob_size);
+        this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment);
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
-                              size_t processed_bytes_count) const override {
+                              size_t &input_blob_size) const override {
         // If the blob was not allocated yet, allocate it.
         if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, processed_bytes_count);
+            blob = this->allocator->allocate(input_blob_size);
+            memcpy(blob, original_blob, input_blob_size);
         }
         for (size_t i = 0; i < dim; i++) {
             static_cast<DataType *>(blob)[i] *= 2;
         }
     }
-    void preprocessQueryInPlace(void *blob, size_t processed_bytes_count,
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
         for (size_t i = 0; i < dim; i++) {
             static_cast<DataType *>(blob)[i] *= 2;
         }
     }
 
-    void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override {
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {
         for (size_t i = 0; i < dim; i++) {
             static_cast<DataType *>(blob)[i] *= 2;
         }
     }
 
-    void preprocessQuery(const void *original_blob, void *&blob, size_t processed_bytes_count,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
         // If the blob was not allocated yet, allocate it.
         if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, processed_bytes_count);
+            blob = this->allocator->allocate_aligned(input_blob_size, alignment);
+            memcpy(blob, original_blob, input_blob_size);
         }
         for (size_t i = 0; i < dim; i++) {
             static_cast<DataType *>(blob)[i] *= 2;

From b1699adf4e27ffebbe103c785d2d3a6d0fb96428 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 17 May 2025 04:50:17 +0000
Subject: [PATCH 07/22] add assert storage_blob == nullptr || input_blob_size
 == processed_bytes_count

add valgrind macro

value_to_add of DummyQueryPreprocessor and DummyMixedPreprocessor is now DataType instead of int
---
 Makefile                                   |   1 +
 src/VecSim/spaces/computer/preprocessors.h |  22 ++-
 tests/unit/CMakeLists.txt                  |   6 +
 tests/unit/test_components.cpp             | 219 ++++++++++++++++-----
 4 files changed, 192 insertions(+), 56 deletions(-)

diff --git a/Makefile b/Makefile
index 450194de8..3fe149f0d 100644
--- a/Makefile
+++ b/Makefile
@@ -177,6 +177,7 @@ ifeq ($(VALGRIND),1)
 _CTEST_ARGS += \
 	-T memcheck \
 	--overwrite MemoryCheckCommandOptions="--leak-check=full --fair-sched=yes --error-exitcode=255"
+CMAKE_FLAGS += -DUSE_VALGRIND=ON
 endif
 
 unit_test:
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index b1bb4932e..c9271c088 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -44,18 +44,25 @@ class PreprocessorInterface : public VecsimBaseObject {
 template <typename DataType>
 class CosinePreprocessor : public PreprocessorInterface {
 public:
-    // This preprocessor assumes storage_blob and query_blob
-    // both are preprocessed in the same way, and yield a blob in the same size.
+    // This preprocessor requires that storage_blob and query_blob have identical memory sizes
+    // both before processing (as input) and after preprocessing completes.
     CosinePreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim,
                        size_t processed_bytes_count)
         : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
           dim(dim), processed_bytes_count(processed_bytes_count) {}
 
-    // If a blob (storage_blob or query_blob) is not nullptr, it means a previous preprocessor
-    // already allocated and processed it. So, we process it inplace. If it's null, we need to
-    // allocate memory for it and copy the original_blob to it.
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
+        // This assert verifies that if a blob was allocated by a previous preprocessor, its
+        // size matches our expected processed size. Therefore, it is safe to skip re-allocation and
+        // process it inplace. Supporting dynamic resizing would require additional size checks (if
+        // statements) and memory management logic, which could impact performance. Currently, no
+        // code path requires this capability. If resizing becomes necessary in the future, remove
+        // the assertions and implement appropriate allocation handling with performance
+        // considerations.
+        assert(storage_blob == nullptr || input_blob_size == processed_bytes_count);
+        assert(query_blob == nullptr || input_blob_size == processed_bytes_count);
+
         // Case 1: Blobs are different (one might be null, or both are allocated and processed
         // separately).
         if (storage_blob != query_blob) {
@@ -87,6 +94,9 @@ class CosinePreprocessor : public PreprocessorInterface {
 
     void preprocessForStorage(const void *original_blob, void *&blob,
                               size_t &input_blob_size) const override {
+        // see assert docs in preprocess
+        assert(blob == nullptr || input_blob_size == processed_bytes_count);
+
         if (blob == nullptr) {
             blob = this->allocator->allocate(processed_bytes_count);
             memcpy(blob, original_blob, input_blob_size);
@@ -97,6 +107,8 @@ class CosinePreprocessor : public PreprocessorInterface {
 
     void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
                          unsigned char alignment) const override {
+        // see assert docs in preprocess
+        assert(blob == nullptr || input_blob_size == processed_bytes_count);
         if (blob == nullptr) {
             blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
             memcpy(blob, original_blob, input_blob_size);
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index 8767e4190..dde8874f4 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -28,6 +28,12 @@ if(FP64_TESTS)
 	add_definitions(-DFP64_TESTS)
 endif()
 
+option(USE_VALGRIND "Building for Valgrind" OFF)
+if(USE_VALGRIND)
+    add_definitions(-DRUNNING_ON_VALGRIND)
+    message(STATUS "Building with RUNNING_ON_VALGRIND definition for Valgrind compatibility")
+endif()
+
 if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv8)|(armv9)")
 	include(${root}/cmake/aarch64InstructionFlags.cmake)
 else()
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index a483e75fa..f74a55846 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -103,8 +103,8 @@ class DummyStoragePreprocessor : public PreprocessorInterface {
 template <typename DataType>
 class DummyQueryPreprocessor : public PreprocessorInterface {
 public:
-    DummyQueryPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                           int _value_to_add_query = 0)
+    DummyQueryPreprocessor(std::shared_ptr<VecSimAllocator> allocator,
+                           DataType value_to_add_storage, DataType _value_to_add_query = 0)
         : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
           value_to_add_query(_value_to_add_query) {
         if (!_value_to_add_query)
@@ -136,16 +136,16 @@ class DummyQueryPreprocessor : public PreprocessorInterface {
     }
 
 private:
-    int value_to_add_storage;
-    int value_to_add_query;
+    DataType value_to_add_storage;
+    DataType value_to_add_query;
 };
 
 // Dummy mixed preprocessor (precesses the blobs  differently)
 template <typename DataType>
 class DummyMixedPreprocessor : public PreprocessorInterface {
 public:
-    DummyMixedPreprocessor(std::shared_ptr<VecSimAllocator> allocator, int value_to_add_storage,
-                           int value_to_add_query)
+    DummyMixedPreprocessor(std::shared_ptr<VecSimAllocator> allocator,
+                           DataType value_to_add_storage, DataType value_to_add_query)
         : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
           value_to_add_query(value_to_add_query) {}
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
@@ -189,8 +189,8 @@ class DummyMixedPreprocessor : public PreprocessorInterface {
     }
 
 private:
-    int value_to_add_storage;
-    int value_to_add_query;
+    DataType value_to_add_storage;
+    DataType value_to_add_query;
 };
 
 // TODO: test increase allocation size ( we don't really need another pp class for this)
@@ -391,7 +391,7 @@ void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) {
     int initial_value = 1;
     int value_to_add = 7;
     const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
+    size_t original_blob_size = sizeof(original_blob);
 
     // Test computer with multiple preprocessors of the same type.
     auto multiPPContainer =
@@ -399,7 +399,7 @@ void MultiPreprocessorsContainerNoAlignment(dummyPreprocessors::pp_mode MODE) {
 
     auto verify_preprocess = [&](int expected_processed_value) {
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+            multiPPContainer.preprocess(original_blob, original_blob_size);
         // Original blob should not be changed
         ASSERT_EQ(original_blob[0], initial_value);
 
@@ -455,7 +455,7 @@ void multiPPContainerMixedPreprocessorNoAlignment() {
     int value_to_add_storage = 7;
     int value_to_add_query = 2;
     const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
+    size_t original_blob_size = sizeof(original_blob);
 
     // Test multiple preprocessors of the same type.
     auto multiPPContainer =
@@ -472,7 +472,7 @@ void multiPPContainerMixedPreprocessorNoAlignment() {
     // scope this section so the blobs are released before the allocator.
     {
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+            multiPPContainer.preprocess(original_blob, original_blob_size);
         // Original blob should not be changed
         ASSERT_EQ(original_blob[0], initial_value);
 
@@ -497,7 +497,7 @@ void multiPPContainerMixedPreprocessorNoAlignment() {
     ASSERT_EQ(multiPPContainer.addPreprocessor(preprocessor2), 0);
     {
         ProcessedBlobs mixed_processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+            multiPPContainer.preprocess(original_blob, original_blob_size);
 
         const void *mixed_pp_storage_blob = mixed_processed_blobs.getStorageBlob();
         const void *mixed_pp_query_blob = mixed_processed_blobs.getQueryBlob();
@@ -534,14 +534,14 @@ void multiPPContainerAlignment(dummyPreprocessors::pp_mode MODE) {
     int initial_value = 1;
     int value_to_add = 7;
     const int original_blob[4] = {initial_value, initial_value, initial_value, initial_value};
-    size_t processed_bytes_count = sizeof(original_blob);
+    size_t original_blob_size = sizeof(original_blob);
 
     auto multiPPContainer =
         MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
 
     auto verify_preprocess = [&](int expected_processed_value) {
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, processed_bytes_count);
+            multiPPContainer.preprocess(original_blob, original_blob_size);
 
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
@@ -594,19 +594,18 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
     float value_to_add_storage = 7.0f;
     float value_to_add_query = 2.0f;
     const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
-    // TODo: change this test so that original_blob_size != processed_bytes_count
-    constexpr size_t processed_bytes_count = sizeof(original_blob);
+    constexpr size_t original_blob_size = sizeof(original_blob);
 
     auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
 
     // adding cosine preprocessor
     auto cosine_preprocessor =
-        new (allocator) CosinePreprocessor<float>(allocator, dim, processed_bytes_count);
+        new (allocator) CosinePreprocessor<float>(allocator, dim, original_blob_size);
     multiPPContainer.addPreprocessor(cosine_preprocessor);
     {
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+            multiPPContainer.preprocess(original_blob, original_blob_size);
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
         // blobs should point to the same memory slot
@@ -626,7 +625,7 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
     multiPPContainer.addPreprocessor(mixed_preprocessor);
     {
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+            multiPPContainer.preprocess(original_blob, original_blob_size);
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
         // blobs should point to a different memory slot
@@ -649,6 +648,7 @@ TEST(PreprocessorsTest, multiPPContainerCosineThenMixedPreprocess) {
     // The preprocessors should be released by the preprocessors container.
 }
 
+// Cosine pp receives different allocation for the storage and query blobs.
 TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
     using namespace dummyPreprocessors;
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
@@ -657,23 +657,28 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
     constexpr size_t dim = 4;
     unsigned char alignment = 5;
 
-    float initial_value = 1.0f;
-    float normalized_value = 0.5f;
-    float value_to_add_storage = 7.0f;
-    float value_to_add_query = 2.0f;
-    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
-    constexpr size_t processed_bytes_count = sizeof(original_blob);
-
+    // In this test the first preprocessor allocates the memory for both blobs, according to the
+    // size passed by the pp container. The second preprocessor expects that if the blobs are
+    // already allocated, their size matches its processed_bytes_count. Hence, the blob size should
+    // be sufficient for the cosine preprocessing.
+    constexpr int8_t normalized_blob_bytes_count = dim * sizeof(int8_t) + sizeof(float);
+    auto blob_alloc = allocator->allocate_unique(normalized_blob_bytes_count);
+    int8_t *original_blob = static_cast<int8_t *>(blob_alloc.get());
+    test_utils::populate_int8_vec(original_blob, dim);
+
+    // Processing params
+    int8_t value_to_add_storage = 7;
+    int8_t value_to_add_query = 4;
     // Creating multi preprocessors container
     auto mixed_preprocessor = new (allocator)
-        DummyMixedPreprocessor<float>(allocator, value_to_add_storage, value_to_add_query);
+        DummyMixedPreprocessor<int8_t>(allocator, value_to_add_storage, value_to_add_query);
     auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+        MultiPreprocessorsContainer<int8_t, n_preprocessors>(allocator, alignment);
     multiPPContainer.addPreprocessor(mixed_preprocessor);
 
     {
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+            multiPPContainer.preprocess(original_blob, normalized_blob_bytes_count);
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
         // blobs should point to a different memory slot
@@ -685,22 +690,46 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
         unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
         ASSERT_EQ(address_alignment, 0);
 
-        // They need to be processed by both processors.
-        ASSERT_EQ(((const float *)storage_blob)[0], initial_value + value_to_add_storage);
-        ASSERT_EQ(((const float *)query_blob)[0], initial_value + value_to_add_query);
+        // Both blobs were processed.
+        ASSERT_EQ(((const int8_t *)storage_blob)[0], original_blob[0] + value_to_add_storage);
+        ASSERT_EQ(((const int8_t *)query_blob)[0], original_blob[0] + value_to_add_query);
 
         // the original blob should not change
         ASSERT_NE(storage_blob, original_blob);
         ASSERT_NE(query_blob, original_blob);
     }
 
-    // adding cosine preprocessor
+    // Processed blob expected output
+    auto expected_processed_blob = [&](int8_t *blob, int8_t value_to_add) {
+        memcpy(blob, original_blob, normalized_blob_bytes_count);
+        blob[0] += value_to_add;
+        VecSim_Normalize(blob, dim, VecSimType_INT8);
+    };
+
+    int8_t expected_processed_storage[normalized_blob_bytes_count] = {0};
+    expected_processed_blob(expected_processed_storage, value_to_add_storage);
+    int8_t expected_processed_query[normalized_blob_bytes_count] = {0};
+    expected_processed_blob(expected_processed_query, value_to_add_query);
+
+    // normalize the original blob
     auto cosine_preprocessor =
-        new (allocator) CosinePreprocessor<float>(allocator, dim, processed_bytes_count);
+        new (allocator) CosinePreprocessor<int8_t>(allocator, dim, normalized_blob_bytes_count);
     multiPPContainer.addPreprocessor(cosine_preprocessor);
     {
+        // An assertion should be raised by the cosine preprocessor for unmatching blob sizes.
+        // in valgrind the test continues, but the assertion appears in its log looking like an
+        // error, so to avoid confusion we skip this line in valgrind.
+#ifndef RUNNING_ON_VALGRIND
+        EXPECT_EXIT(
+            {
+                ProcessedBlobs processed_blobs = multiPPContainer.preprocess(
+                    original_blob, normalized_blob_bytes_count - sizeof(float));
+            },
+            testing::KilledBySignal(SIGABRT), "input_blob_size == processed_bytes_count");
+#endif
+        // Use the correct size
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+            multiPPContainer.preprocess(original_blob, normalized_blob_bytes_count);
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
         // blobs should point to a different memory slot
@@ -711,23 +740,106 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
         // They need to be allocated and processed
         ASSERT_NE(storage_blob, nullptr);
         ASSERT_NE(query_blob, nullptr);
-        float expected_processed_storage[dim] = {initial_value + value_to_add_storage,
-                                                 initial_value, initial_value, initial_value};
-        float expected_processed_query[dim] = {initial_value + value_to_add_query, initial_value,
-                                               initial_value, initial_value};
-        VecSim_Normalize(expected_processed_storage, dim, VecSimType_FLOAT32);
-        VecSim_Normalize(expected_processed_query, dim, VecSimType_FLOAT32);
-        ASSERT_EQ(((const float *)storage_blob)[0], expected_processed_storage[0]);
-        ASSERT_EQ(((const float *)query_blob)[0], expected_processed_query[0]);
         // the original blob should not change
         ASSERT_NE(storage_blob, original_blob);
         ASSERT_NE(query_blob, original_blob);
+
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<int8_t>(static_cast<const int8_t *>(storage_blob),
+                                                       expected_processed_storage,
+                                                       normalized_blob_bytes_count));
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<int8_t>(static_cast<const int8_t *>(query_blob),
+                                                       expected_processed_query,
+                                                       normalized_blob_bytes_count));
     }
     // The preprocessors should be released by the preprocessors container.
 }
-TEST(PreprocessorsTest, decrease_size_STORAGE_then_cosine_no_size_change) {}
-TEST(PreprocessorsTest, decrease_size_QUERY_then_cosine_no_size_change) {}
 
+template <typename PreprocessorType>
+void AsymmetricPPThenCosine(dummyPreprocessors::pp_mode MODE) {
+    using namespace dummyPreprocessors;
+
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t dim = 4;
+    unsigned char alignment = 5;
+
+    float original_blob[dim] = {0};
+    size_t original_blob_size = dim * sizeof(float);
+    test_utils::populate_float_vec(original_blob, dim);
+
+    // Processing params
+    float value_to_add_storage = 0;
+    float value_to_add_query = 0;
+    if (MODE == STORAGE_ONLY) {
+        value_to_add_storage = 7;
+    } else if (MODE == QUERY_ONLY) {
+        value_to_add_query = 4;
+    }
+
+    // Processed blob expected output
+    auto expected_processed_blob = [&](float *blob, float value_to_add) {
+        memcpy(blob, original_blob, original_blob_size);
+        blob[0] += value_to_add;
+        VecSim_Normalize(blob, dim, VecSimType_FLOAT32);
+    };
+
+    float expected_processed_storage[original_blob_size] = {0};
+    expected_processed_blob(expected_processed_storage, value_to_add_storage);
+    float expected_processed_query[original_blob_size] = {0};
+    expected_processed_blob(expected_processed_query, value_to_add_query);
+
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    // will allocate either the storage or the query blob, depending on the preprocessor type.
+    // TODO : can we pass just value_to_add?
+    auto asymmetric_preprocessor =
+        new (allocator) PreprocessorType(allocator, value_to_add_storage, value_to_add_query);
+    auto cosine_preprocessor =
+        new (allocator) CosinePreprocessor<float>(allocator, dim, original_blob_size);
+
+    multiPPContainer.addPreprocessor(asymmetric_preprocessor);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to a different memory slot
+        ASSERT_NE(storage_blob, query_blob);
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+
+        // query blob should be aligned
+        unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+        ASSERT_EQ(address_alignment, 0);
+
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(static_cast<const float *>(storage_blob),
+                                                      expected_processed_storage, dim));
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<float>(static_cast<const float *>(query_blob),
+                                                      expected_processed_query, dim));
+    }
+}
+// The first preprocessor in the chain allocates only the storage blob,
+// and the responsibility of allocating the query blob is delegated to the next preprocessor
+// in the chain (cosine preprocessor in this case).
+TEST(PreprocessorsTest, STORAGE_then_cosine_no_size_change) {
+    using namespace dummyPreprocessors;
+    EXPECT_NO_FATAL_FAILURE(
+        AsymmetricPPThenCosine<DummyStoragePreprocessor<float>>(pp_mode::STORAGE_ONLY));
+}
+
+TEST(PreprocessorsTest, QUERY_then_cosine_no_size_change) {
+    using namespace dummyPreprocessors;
+    EXPECT_NO_FATAL_FAILURE(
+        AsymmetricPPThenCosine<DummyQueryPreprocessor<float>>(pp_mode::QUERY_ONLY));
+}
+
+// A test where the value of input_blob_size is modified by the first pp.
+// The cosine pp processed_bytes_count should be set at initialization with the *modified* value,
+// otherwise, an assertion will be raised by it for an allocated blob that is smaller than the
+// expected size.
 TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) {
     using namespace dummyPreprocessors;
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
@@ -761,7 +873,7 @@ TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) {
         CosinePreprocessor<float>(allocator, new_elem_amount, new_processed_bytes_count);
     // Creating multi preprocessors container
     auto multiPPContainer =
-        MultiPreprocessorsContainer<DummyType, n_preprocessors>(allocator, alignment);
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
     multiPPContainer.addPreprocessor(decrease_size_preprocessor);
     multiPPContainer.addPreprocessor(cosine_preprocessor);
 
@@ -784,13 +896,17 @@ TEST(PreprocessorsTest, DecreaseSizeThenFloatNormalize) {
     }
 }
 
+// In this test the original blob size is smaller than the processed_bytes_count of the
+// cosine preprocessor. Before the bug fix, the cosine pp would try to copy processed_bytes_count
+// bytes of the original blob to the allocated memory, which would cause an out of bound read,
+// that should be detected by valgrind.
 TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
     using namespace dummyPreprocessors;
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
 
     constexpr size_t n_preprocessors = 2;
     constexpr size_t alignment = 5;
-    constexpr size_t elements = 8;
+    constexpr size_t elements = 7;
 
     // valgrind detects out of bound reads only if the considered memory is allocated on the heap,
     // rather than on the stack.
@@ -806,7 +922,7 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
         normalized_blob_bytes_count + elements_addition * sizeof(unsigned char);
 
     // Processed blob expected output
-    int8_t expected_processed_blob[elements + sizeof(float) + 3] = {0};
+    int8_t expected_processed_blob[elements + sizeof(float) + elements_addition] = {0};
     memcpy(expected_processed_blob, original_blob, original_blob_size);
     // normalize the original blob
     VecSim_Normalize(expected_processed_blob, elements, VecSimType_INT8);
@@ -829,8 +945,9 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
     multiPPContainer.addPreprocessor(increase_size_preprocessor);
 
     {
+        // Note: we pass the original blob size to detect out of bound reads in the cosine pp.
         ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, sizeof(original_blob));
+            multiPPContainer.preprocess(original_blob, original_blob_size);
         const void *storage_blob = processed_blobs.getStorageBlob();
         const void *query_blob = processed_blobs.getQueryBlob();
         // blobs should point to the same memory slot

From 6dc543df53daf8dba8bef5d22e102fc4222c8a76 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 17 May 2025 14:35:44 +0000
Subject: [PATCH 08/22] enable assert only in debug

---
 tests/unit/test_components.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index f74a55846..6314cfb79 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -719,7 +719,7 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
         // An assertion should be raised by the cosine preprocessor for unmatching blob sizes.
         // in valgrind the test continues, but the assertion appears in its log looking like an
         // error, so to avoid confusion we skip this line in valgrind.
-#ifndef RUNNING_ON_VALGRIND
+#if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
         EXPECT_EXIT(
             {
                 ProcessedBlobs processed_blobs = multiPPContainer.preprocess(

From 3e673b7b6eb1a3a5994eb144c85e91a35b01a8bb Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 17 May 2025 14:39:15 +0000
Subject: [PATCH 09/22] use constexpr for blob size

---
 tests/unit/test_components.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 6314cfb79..ce7a0ddd7 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -765,7 +765,7 @@ void AsymmetricPPThenCosine(dummyPreprocessors::pp_mode MODE) {
     unsigned char alignment = 5;
 
     float original_blob[dim] = {0};
-    size_t original_blob_size = dim * sizeof(float);
+    constexpr size_t original_blob_size = dim * sizeof(float);
     test_utils::populate_float_vec(original_blob, dim);
 
     // Processing params

From 8967d40b237ce4a4c2afb932d60f6c161a8a6119 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 18 May 2025 11:42:31 +0000
Subject: [PATCH 10/22] small docs changes

---
 src/VecSim/spaces/computer/preprocessors.h | 9 ---------
 tests/unit/CMakeLists.txt                  | 2 +-
 tests/unit/test_components.cpp             | 1 -
 3 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index c9271c088..88c34506e 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -17,17 +17,10 @@
 #include "VecSim/spaces/spaces.h"
 #include "VecSim/memory/memory_utils.h"
 
-// TODO: Handle processed_bytes_count that might change down the preprocessors pipeline.
-// The preprocess function calls a pipeline of preprocessors, one of which can be a quantization
-// preprocessor. In such cases, the quantization preprocessor compresses the vector, resulting in a
-// change in the allocation size.
 class PreprocessorInterface : public VecsimBaseObject {
 public:
     PreprocessorInterface(std::shared_ptr<VecSimAllocator> allocator)
         : VecsimBaseObject(allocator) {}
-    // TODO: handle a dynamic processed_bytes_count, as the allocation size of the blob might change
-    // down the preprocessors pipeline (such as in quantization preprocessor that compresses the
-    // vector).
     // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
     // are the same size.
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
@@ -120,8 +113,6 @@ class CosinePreprocessor : public PreprocessorInterface {
     void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
         assert(blob);
-        // TODO: replace with a debug assert and log the exact values of input_blob_size and
-        // processed_bytes_count to improve observability
         assert(input_blob_size == this->processed_bytes_count);
         normalize_func(blob, this->dim);
     }
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index dde8874f4..515ab2ae1 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -31,7 +31,7 @@ endif()
 option(USE_VALGRIND "Building for Valgrind" OFF)
 if(USE_VALGRIND)
     add_definitions(-DRUNNING_ON_VALGRIND)
-    message(STATUS "Building with RUNNING_ON_VALGRIND definition for Valgrind compatibility")
+    message(STATUS "Building with RUNNING_ON_VALGRIND")
 endif()
 
 if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv8)|(armv9)")
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index ce7a0ddd7..6c596fb08 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -193,7 +193,6 @@ class DummyMixedPreprocessor : public PreprocessorInterface {
     DataType value_to_add_query;
 };
 
-// TODO: test increase allocation size ( we don't really need another pp class for this)
 // A preprocessor that changes the allocation size of the blobs in the same manner.
 // set excess bytes to (char)2
 template <typename DataType>

From 674b136b4e8c814d400993c5abbe964b76cbc453 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 27 May 2025 07:47:34 +0000
Subject: [PATCH 11/22] review fixes

---
 tests/unit/test_components.cpp | 57 +++++++++-------------------------
 1 file changed, 14 insertions(+), 43 deletions(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 6c596fb08..5fefe006a 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -218,9 +218,9 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
         }
         // The blobs are not equal
 
-        // If the input blob size is not enough
-        if (input_blob_size < processed_bytes_count) {
-            auto alloc_and_process = [&](void *&blob) {
+        auto alloc_and_process = [&](void *&blob) {
+            // If the input blob size is not enough
+            if (input_blob_size < processed_bytes_count) {
                 auto new_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
                 if (blob == nullptr) {
                     memcpy(new_blob, original_blob, input_blob_size);
@@ -232,12 +232,7 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
                 blob = new_blob;
                 memset((char *)blob + input_blob_size, excess_value,
                        processed_bytes_count - input_blob_size);
-            };
-
-            alloc_and_process(storage_blob);
-            alloc_and_process(query_blob);
-        } else {
-            auto alloc_and_process = [&](void *&blob) {
+            } else {
                 if (blob == nullptr) {
                     blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
                     memcpy(blob, original_blob, processed_bytes_count);
@@ -245,11 +240,11 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
                     memset((char *)blob + processed_bytes_count, excess_value,
                            input_blob_size - processed_bytes_count);
                 }
-            };
+            }
+        };
 
-            alloc_and_process(storage_blob);
-            alloc_and_process(query_blob);
-        }
+        alloc_and_process(storage_blob);
+        alloc_and_process(query_blob);
 
         // update the input blob size
         input_blob_size = processed_bytes_count;
@@ -690,8 +685,10 @@ TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
         ASSERT_EQ(address_alignment, 0);
 
         // Both blobs were processed.
-        ASSERT_EQ(((const int8_t *)storage_blob)[0], original_blob[0] + value_to_add_storage);
-        ASSERT_EQ(((const int8_t *)query_blob)[0], original_blob[0] + value_to_add_query);
+        ASSERT_EQ((static_cast<const int8_t *>(storage_blob))[0],
+                  original_blob[0] + value_to_add_storage);
+        ASSERT_EQ((static_cast<const int8_t *>(query_blob))[0],
+                  original_blob[0] + value_to_add_query);
 
         // the original blob should not change
         ASSERT_NE(storage_blob, original_blob);
@@ -783,9 +780,9 @@ void AsymmetricPPThenCosine(dummyPreprocessors::pp_mode MODE) {
         VecSim_Normalize(blob, dim, VecSimType_FLOAT32);
     };
 
-    float expected_processed_storage[original_blob_size] = {0};
+    float expected_processed_storage[dim] = {0};
     expected_processed_blob(expected_processed_storage, value_to_add_storage);
-    float expected_processed_query[original_blob_size] = {0};
+    float expected_processed_query[dim] = {0};
     expected_processed_blob(expected_processed_query, value_to_add_query);
 
     auto multiPPContainer =
@@ -963,29 +960,3 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
                                                        final_blob_bytes_count));
     }
 }
-
-TEST(PreprocessorsTest, cosine_then_change_size) {
-    // cosine (not changing)
-    // pp that changes the blob size
-}
-
-TEST(PreprocessorsTest, cosine_change_then_pp_change) {
-    // cosine ( changing)
-    // pp that also changes the blob size
-}
-
-// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-//     // add cosine pp that changes the original blob size
-//     // add a pp that preprocesses the normalized blob (same size)
-//     // add a pp that changes the storage_blob size, but not changing the query_blob size
-// }
-
-// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-//     // add a pp that changes the storage_blob size, but not changing the query_blob size
-//     // add a pp that preprocesses the normalized blob (same size)
-//     // add cosine pp that changes the original blob size
-// }
-
-// TEST(PreprocessorsTest, multiPPContainerMixedThenCosinePreprocess) {
-//     // pp multi container where cosine is only needed for the query blob (not supported yet)
-// }

From d529f5ebbe9cf1908b7adf2902cc9ccef773f495 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 27 May 2025 07:59:06 +0000
Subject: [PATCH 12/22] =?UTF-8?q?=D7=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_components.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 5fefe006a..f6eabd3c8 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -925,7 +925,7 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
     // add the values of the pp that increases the blob size
     unsigned char added_value = DummyChangeAllocSizePreprocessor<int8_t>::getExcessValue();
     for (size_t i = 0; i < elements_addition; i++) {
-        expected_processed_blob[elements + sizeof(float) + i] = added_value;
+        expected_processed_blob[elements + sizeof(float) + i] = static_cast<int8_t>(added_value);
     }
 
     // A normalize pp - will allocate the blob + sizeof(float)

From af11142218c3e0ee32dd58e46d02feb16a534457 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Wed, 28 May 2025 10:28:01 +0300
Subject: [PATCH 13/22] notes and changes

---
 src/VecSim/spaces/computer/preprocessors.h | 190 ++++++++++++++++++++-
 1 file changed, 189 insertions(+), 1 deletion(-)

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 88c34506e..2b46e09d7 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -44,8 +44,9 @@ class CosinePreprocessor : public PreprocessorInterface {
         : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
           dim(dim), processed_bytes_count(processed_bytes_count) {}
 
+    // TODO: add storage_blob_size and query_blob_size parameters
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t &input_blob_size, unsigned char alignment) const override {
+                    size_t &storage_blob_size,  unsigned char alignment) const override {
         // This assert verifies that if a blob was allocated by a previous preprocessor, its
         // size matches our expected processed size. Therefore, it is safe to skip re-allocation and
         // process it inplace. Supporting dynamic resizing would require additional size checks (if
@@ -128,3 +129,190 @@ class CosinePreprocessor : public PreprocessorInterface {
     const size_t dim;
     const size_t processed_bytes_count;
 };
+
+
+// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision of uint8_t.
+// The quantization is done by finding the minimum and maximum values of the input vector, and then
+// scaling the values to fit in the range of [0, 255]. The quantized values are then stored in a uint8_t array.
+// [Quantized values, min, delta]
+// Quantized Blob size  =  dim_elements * sizeof(int8)  +  2 * sizeof(float)
+// delta = (max_val - min_val) / 255.0f
+// quantized_v[i] = (v[i] - min_val) / delta
+// preprocessForStorage:
+// if null:
+//      - We are not reallocing because it will be released after the query.
+//      Allocate quantized blob size
+// 3. Compute (min, delta) and quantize to the quantized blob or in place.
+// preprocessQuery: No-op – queries arrive as float32 and remain uncompressed
+
+
+// preprocess -> 
+// if storage_blob == null || storage_blob == query_blob:
+//      allocate storage blob with storage_blob_size
+// edge case:
+// Check if the input size is not big enough and if not, reallocate it, and update the size. -add this scenario to tests
+// quantize it the storage_blob
+
+
+// add class member const size_t storage_bytes_count, and calculate it as follows:
+// storage_bytes_count = dim * sizeof(uint8_t) + 2 * sizeof(float);
+
+
+template <typename DataType>
+class QuantPreprocessor : public PreprocessorInterface {
+public:
+    QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim,
+                      size_t processed_bytes_count)
+        : PreprocessorInterface(allocator), dim(dim), processed_bytes_count(processed_bytes_count) {}
+
+    // Helper function to perform quantization
+    void quantize(const float* input, uint8_t* output, float& min_val, float& delta) const {
+        // Find min and max values
+        min_val = input[0];
+        float max_val = input[0];
+        for (size_t i = 1; i < dim; i++) {
+            min_val = std::min(min_val, input[i]);
+            max_val = std::max(max_val, input[i]);
+        }
+        
+        // Calculate scaling factor
+        delta = (max_val - min_val) / 255.0f;
+        
+        // Quantize values
+        if (delta > 0) {
+            for (size_t i = 0; i < dim; i++) {
+                output[i] = static_cast<uint8_t>((input[i] - min_val) / delta);
+            }
+        } else {
+            // Handle case where all values are the same
+            memset(output, 0, dim);
+        }
+    }
+
+    // Calculate the norm of the dequantized vector
+    float calculateDequantizedNorm(const uint8_t* quantized, const float min_val, const float delta) const {
+        float norm = 0.0f;
+        for (size_t i = 0; i < dim; i++) {
+            float dequantized = min_val + quantized[i] * delta;
+            norm += dequantized * dequantized;
+        }
+        return norm > 0 ? 1.0f / std::sqrt(norm) : 0.0f;
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t &input_blob_size, unsigned char alignment) const override {
+        // This assert verifies that if a blob was allocated by a previous preprocessor, its
+        // size matches our expected processed size. Therefore, it is safe to skip re-allocation and
+        // process it inplace. Supporting dynamic resizing would require additional size checks (if
+        // statements) and memory management logic, which could impact performance. Currently, no
+        // code path requires this capability. If resizing becomes necessary in the future, remove
+        // the assertions and implement appropriate allocation handling with performance
+        // considerations.
+        assert(storage_blob == nullptr || input_blob_size == processed_bytes_count);
+        assert(query_blob == nullptr || input_blob_size == processed_bytes_count);
+        // Case 1: Blobs are different (one might be null, or both are allocated and processed
+        // separately).
+        if (storage_blob != query_blob) {
+            // If one of them is null, allocate memory for it and copy the original_blob to it.
+            if (storage_blob == nullptr) {
+                storage_blob = this->allocator->allocate(processed_bytes_count);
+                memcpy(storage_blob, original_blob, input_blob_size);
+            } else if (query_blob == nullptr) {
+                query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                memcpy(query_blob, original_blob, input_blob_size);
+            }
+        } else { // Case 2: Blobs are the same (either both are null or processed in the same way).
+            if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set
+                                         // storage_blob to point to it.
+                query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+                memcpy(query_blob, original_blob, input_blob_size);
+                storage_blob = query_blob;
+            }
+        }
+        input_blob_size = processed_bytes_count;
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t &input_blob_size) const override {
+        // see assert docs in preprocess
+        assert(blob == nullptr || input_blob_size == processed_bytes_count);
+
+        if (blob == nullptr) {
+            blob = this->allocator->allocate(processed_bytes_count);
+            memcpy(blob, original_blob, input_blob_size);
+        }
+        
+        // Only quantize if input is float32 and we're quantizing to uint8_t
+        if constexpr (std::is_same_v<DataType, float>) {
+            // Cast to appropriate types
+            const float* input = static_cast<const float*>(original_blob);
+            uint8_t* quantized = static_cast<uint8_t*>(blob);
+            
+            // Reserve space for metadata at the end of the blob
+            // [quantized values, min_val, delta, inverted_norm]
+            float* metadata = reinterpret_cast<float*>(quantized + dim);
+            
+            // Perform quantization
+            float min_val, delta;
+            quantize(input, quantized, min_val, delta);
+            
+            // Store min_val and delta in the metadata
+            metadata[0] = min_val;
+            metadata[1] = delta;
+            
+            // Calculate and store the inverted norm for cosine similarity
+            metadata[2] = calculateDequantizedNorm(quantized, min_val, delta);
+        }
+        
+        input_blob_size = processed_bytes_count;
+    }
+
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
+                         unsigned char alignment) const override {
+        // see assert docs in preprocess
+        assert(blob == nullptr || input_blob_size == processed_bytes_count);
+        if (blob == nullptr) {
+            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
+            memcpy(blob, original_blob, input_blob_size);
+        }
+        input_blob_size = processed_bytes_count;
+    }
+
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
+                                unsigned char alignment) const override {
+        assert(blob);
+        assert(input_blob_size == this->processed_bytes_count);
+    }
+
+    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {
+        assert(blob);
+        assert(input_blob_size == this->processed_bytes_count);
+        
+        if constexpr (std::is_same_v<DataType, float>) {
+            // Cast to appropriate types
+            float* input = static_cast<float*>(blob);
+            uint8_t* quantized = static_cast<uint8_t*>(blob);
+            
+            // Create temporary copy of input data
+            std::vector<float> temp_input(input, input + dim);
+            
+            // Reserve space for metadata at the end of the blob
+            float* metadata = reinterpret_cast<float*>(quantized + dim);
+            
+            // Perform quantization
+            float min_val, delta;
+            quantize(temp_input.data(), quantized, min_val, delta);
+            
+            // Store min_val and delta in the metadata
+            metadata[0] = min_val;
+            metadata[1] = delta;
+            
+            // Calculate and store the inverted norm for cosine similarity
+            metadata[2] = calculateDequantizedNorm(quantized, min_val, delta);
+        }
+    }
+
+private:
+    const size_t dim;
+    const size_t processed_bytes_count;
+};

From eacd40f1bbe4512fccac582c69fb4944dbe99f4d Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Thu, 29 May 2025 17:10:23 +0300
Subject: [PATCH 14/22] Added tests and changes to the PP

---
 .../spaces/computer/preprocessor_container.h  |   8 +-
 src/VecSim/spaces/computer/preprocessors.h    | 223 +++----
 tests/unit/test_components.cpp                | 624 +++++++++++++++++-
 tests/unit/test_hnsw_tiered.cpp               |  11 +
 4 files changed, 735 insertions(+), 131 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index cd2f0dec8..41728de66 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -171,10 +171,16 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o
 
     void *storage_blob = nullptr;
     void *query_blob = nullptr;
+
+    // Sepreated variables for the storage blob size and query_blob_size, 
+    // in case we need to change their sizes to different values.
+    size_t storage_blob_size = input_blob_size;
+    size_t query_blob_size = input_blob_size;
+
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
+        pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size, this->alignment);
     }
     // At least one blob was allocated.
 
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 8c8112916..725bba098 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -12,6 +12,7 @@
 #include <cstddef>
 #include <memory>
 #include <cassert>
+#include <cmath>
 
 #include "VecSim/memory/vecsim_base.h"
 #include "VecSim/spaces/spaces.h"
@@ -23,8 +24,13 @@ class PreprocessorInterface : public VecsimBaseObject {
         : VecsimBaseObject(allocator) {}
     // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
     // are the same size.
+    //TODO: Add query_blob_size as a parameter to the preprocess functions, to allow
+    // different sizes for storage and query blobs in the future, if needed.
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                             size_t &input_blob_size, unsigned char alignment) const = 0;
+    virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const = 0;
     virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
                                       size_t &input_blob_size) const = 0;
     virtual void preprocessQuery(const void *original_blob, void *&query_blob,
@@ -44,6 +50,20 @@ class CosinePreprocessor : public PreprocessorInterface {
         : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
           dim(dim), processed_bytes_count(processed_bytes_count) {}
 
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
+        // This assert verifies that that the current use of this function is for blobs of the same
+        // size, which is the case for the Cosine preprocessor. If we ever need to support different
+        // sizes for storage and query blobs, we can remove the assert and implement the logic to
+        // handle different sizes.
+        assert(storage_blob_size == query_blob_size);
+
+        preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
+        query_blob_size = storage_blob_size; // Ensure both blobs have the same size after processing.
+    }
+
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
         // This assert verifies that if a blob was allocated by a previous preprocessor, its
@@ -145,173 +165,118 @@ class CosinePreprocessor : public PreprocessorInterface {
 // preprocessQuery: No-op – queries arrive as float32 and remain uncompressed
 
 
-// preprocess -> 
-// if storage_blob == null || storage_blob == query_blob:
-//      allocate storage blob with storage_blob_size
-// edge case:
-// Check if the input size is not big enough and if not, reallocate it, and update the size. -add this scenario to tests
-// quantize it the storage_blob
-
-
-// add class member const size_t storage_bytes_count, and calculate it as follows:
-// storage_bytes_count = dim * sizeof(uint8_t) + 2 * sizeof(float);
-
-
-template <typename DataType>
 class QuantPreprocessor : public PreprocessorInterface {
 public:
-    QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim,
-                      size_t processed_bytes_count)
-        : PreprocessorInterface(allocator), dim(dim), processed_bytes_count(processed_bytes_count) {}
+    // Constructor for backward compatibility (single blob size)
+    QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
+        : PreprocessorInterface(allocator), dim(dim),
+          storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {} // quantized + min + delta + inverted_norm {}
+
 
     // Helper function to perform quantization
-    void quantize(const float* input, uint8_t* output, float& min_val, float& delta) const {
+    void quantize(const float* input, uint8_t* quantized) const {
+        assert(dim > 0);
         // Find min and max values
-        min_val = input[0];
+        float min_val = input[0];
         float max_val = input[0];
-        for (size_t i = 1; i < dim; i++) {
+        for (size_t i = 1; i < this->dim; i++) {
             min_val = std::min(min_val, input[i]);
             max_val = std::max(max_val, input[i]);
         }
-        
+
         // Calculate scaling factor
-        delta = (max_val - min_val) / 255.0f;
-        
-        // Quantize values
-        if (delta > 0) {
-            for (size_t i = 0; i < dim; i++) {
-                output[i] = static_cast<uint8_t>((input[i] - min_val) / delta);
-            }
-        } else {
-            // Handle case where all values are the same
-            memset(output, 0, dim);
+        const float diff = (max_val - min_val);
+        const float delta = diff == 0.0f ? 1.0f : diff / 255.0f;
+        const float inv_delta = 1.0f / delta;
+
+        // Quantize the values
+        for (size_t i = 0; i < this->dim; i++) {
+            quantized[i] = static_cast<uint8_t>(std::round((input[i] - min_val) * inv_delta));
         }
+
+        // Reserve space for metadata at the end of the blob
+        // [quantized values, min_val, delta, inverted_norm]
+        float* metadata = reinterpret_cast<float*>(quantized + this->dim);
+
+        // Store min_val, delta, and inverted_norm in the metadata
+        metadata[0] = min_val;
+        metadata[1] = delta;
     }
 
-    // Calculate the norm of the dequantized vector
-    float calculateDequantizedNorm(const uint8_t* quantized, const float min_val, const float delta) const {
-        float norm = 0.0f;
-        for (size_t i = 0; i < dim; i++) {
-            float dequantized = min_val + quantized[i] * delta;
-            norm += dequantized * dequantized;
-        }
-        return norm > 0 ? 1.0f / std::sqrt(norm) : 0.0f;
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &input_blob_size, unsigned char alignment) const override {
+        // For backward compatibility - both blobs have the same size
+        preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size, alignment);
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                    size_t &input_blob_size, unsigned char alignment) const override {
-        // This assert verifies that if a blob was allocated by a previous preprocessor, its
-        // size matches our expected processed size. Therefore, it is safe to skip re-allocation and
-        // process it inplace. Supporting dynamic resizing would require additional size checks (if
-        // statements) and memory management logic, which could impact performance. Currently, no
-        // code path requires this capability. If resizing becomes necessary in the future, remove
-        // the assertions and implement appropriate allocation handling with performance
-        // considerations.
-        assert(storage_blob == nullptr || input_blob_size == processed_bytes_count);
-        assert(query_blob == nullptr || input_blob_size == processed_bytes_count);
-        // Case 1: Blobs are different (one might be null, or both are allocated and processed
-        // separately).
-        if (storage_blob != query_blob) {
-            // If one of them is null, allocate memory for it and copy the original_blob to it.
-            if (storage_blob == nullptr) {
-                storage_blob = this->allocator->allocate(processed_bytes_count);
-                memcpy(storage_blob, original_blob, input_blob_size);
-            } else if (query_blob == nullptr) {
-                query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-                memcpy(query_blob, original_blob, input_blob_size);
-            }
-        } else { // Case 2: Blobs are the same (either both are null or processed in the same way).
-            if (query_blob == nullptr) { // If both blobs are null, allocate query_blob and set
-                                         // storage_blob to point to it.
-                query_blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-                memcpy(query_blob, original_blob, input_blob_size);
-                storage_blob = query_blob;
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const override {
+
+
+        if (storage_blob)
+        {
+            if (storage_blob == query_blob) {
+                storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
+            } else if (storage_blob_size < storage_bytes_count) {
+                // Check if the blob allocated by a previous preprocessor is big enough, otherwise, realloc it.
+                // Can happen when the dim is smaller than the quantization metadata.
+                // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8 bytes.
+                // But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) = 10 bytes.
+                storage_blob = this->allocator->reallocate(storage_blob, this->storage_bytes_count);
             }
+
         }
-        input_blob_size = processed_bytes_count;
+        else {
+            // storage_blob is nullptr, so we need to allocate new memory
+            storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
+        }
+        storage_blob_size = this->storage_bytes_count;
+
+        // Cast to appropriate types
+        const float* input = static_cast<const float*>(original_blob);
+        uint8_t* quantized = static_cast<uint8_t*>(storage_blob);
+        quantize(input, quantized);
+
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
                               size_t &input_blob_size) const override {
-        // see assert docs in preprocess
-        assert(blob == nullptr || input_blob_size == processed_bytes_count);
-
+        // Allocate quantized blob if needed
         if (blob == nullptr) {
-            blob = this->allocator->allocate(processed_bytes_count);
-            memcpy(blob, original_blob, input_blob_size);
-        }
-        
-        // Only quantize if input is float32 and we're quantizing to uint8_t
-        if constexpr (std::is_same_v<DataType, float>) {
-            // Cast to appropriate types
-            const float* input = static_cast<const float*>(original_blob);
-            uint8_t* quantized = static_cast<uint8_t*>(blob);
-            
-            // Reserve space for metadata at the end of the blob
-            // [quantized values, min_val, delta, inverted_norm]
-            float* metadata = reinterpret_cast<float*>(quantized + dim);
-            
-            // Perform quantization
-            float min_val, delta;
-            quantize(input, quantized, min_val, delta);
-            
-            // Store min_val and delta in the metadata
-            metadata[0] = min_val;
-            metadata[1] = delta;
-            
-            // Calculate and store the inverted norm for cosine similarity
-            metadata[2] = calculateDequantizedNorm(quantized, min_val, delta);
+            blob = this->allocator->allocate(storage_bytes_count);
         }
-        
-        input_blob_size = processed_bytes_count;
+
+        // Cast to appropriate types
+        const float* input = static_cast<const float*>(original_blob);
+        uint8_t* quantized = static_cast<uint8_t*>(blob);
+        quantize(input, quantized);
+
+        input_blob_size = storage_bytes_count;
     }
 
-    void preprocessQuery(const void *original_blob, void *&blob, size_t &input_blob_size,
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
                          unsigned char alignment) const override {
-        // see assert docs in preprocess
-        assert(blob == nullptr || input_blob_size == processed_bytes_count);
+        // No-op: queries remain as float32
         if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(processed_bytes_count, alignment);
-            memcpy(blob, original_blob, input_blob_size);
+            blob = this->allocator->allocate_aligned(query_blob_size, alignment);
+            memcpy(blob, original_blob, query_blob_size);
         }
-        input_blob_size = processed_bytes_count;
     }
 
     void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
+        // No-op: queries remain as float32
         assert(blob);
-        assert(input_blob_size == this->processed_bytes_count);
     }
 
-    void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override {
-        assert(blob);
-        assert(input_blob_size == this->processed_bytes_count);
-        
-        if constexpr (std::is_same_v<DataType, float>) {
-            // Cast to appropriate types
-            float* input = static_cast<float*>(blob);
-            uint8_t* quantized = static_cast<uint8_t*>(blob);
-            
-            // Create temporary copy of input data
-            std::vector<float> temp_input(input, input + dim);
-            
-            // Reserve space for metadata at the end of the blob
-            float* metadata = reinterpret_cast<float*>(quantized + dim);
-            
-            // Perform quantization
-            float min_val, delta;
-            quantize(temp_input.data(), quantized, min_val, delta);
-            
-            // Store min_val and delta in the metadata
-            metadata[0] = min_val;
-            metadata[1] = delta;
-            
-            // Calculate and store the inverted norm for cosine similarity
-            metadata[2] = calculateDequantizedNorm(quantized, min_val, delta);
-        }
+    void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
+        // This function is unused for this preprocessor.
+        assert(original_blob);
+        assert(input_blob_size >= storage_bytes_count);
     }
 
 private:
     const size_t dim;
-    const size_t processed_bytes_count;
+    const size_t storage_bytes_count;
 };
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index f6eabd3c8..14dd78898 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -10,6 +10,7 @@
 #include "gtest/gtest.h"
 #include "VecSim/vec_sim.h"
 #include "VecSim/spaces/computer/preprocessor_container.h"
+#include "VecSim/spaces/computer/preprocessors.h"
 #include "VecSim/spaces/computer/calculator.h"
 #include "unit_test_utils.h"
 #include "tests_utils.h"
@@ -65,7 +66,16 @@ class DummyStoragePreprocessor : public PreprocessorInterface {
         if (!value_to_add_query)
             value_to_add_query = value_to_add_storage;
     }
-
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
+        // This assert verifies that there's no use for this function for now - different sizes for
+        // storage and query blobs. If such a use case arises, we can remove the assert and implement
+        // the logic to handle different sizes.
+        assert(storage_blob_size == query_blob_size);
+
+        preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
+    }
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
 
@@ -111,6 +121,17 @@ class DummyQueryPreprocessor : public PreprocessorInterface {
             value_to_add_query = value_to_add_storage;
     }
 
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const override {
+        // This assert verifies that there's no use for this function for now - different sizes for
+        // storage and query blobs. If such a use case arises, we can remove the assert and implement
+        // the logic to handle different sizes.
+        assert(storage_blob_size == query_blob_size);
+
+        preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
+    }
+
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
         this->preprocessQuery(original_blob, query_blob, input_blob_size, alignment);
@@ -148,6 +169,12 @@ class DummyMixedPreprocessor : public PreprocessorInterface {
                            DataType value_to_add_storage, DataType value_to_add_query)
         : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
           value_to_add_query(value_to_add_query) {}
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const override {
+        preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
+    }
+
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
 
@@ -208,6 +235,19 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
 
     static constexpr unsigned char getExcessValue() { return excess_value; }
 
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const override {
+        // if the blobs are equal,
+        if (storage_blob == query_blob) {
+            preprocessGeneral(original_blob, storage_blob, storage_blob_size, alignment);
+            query_blob = storage_blob;
+            query_blob_size = storage_blob_size;
+            return;
+        }
+    }
+
+            // If the input blob size is not enough
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
         // if the blobs are equal,
@@ -960,3 +1000,585 @@ TEST(PreprocessorsTest, Int8NormalizeThenIncreaseSize) {
                                                        final_blob_bytes_count));
     }
 }
+
+// Tests the quantization preprocessor with a single preprocessor in the chain.
+// The QuantPreprocessor allocates the storage blob and processes it, while the query blob
+// is unprocessed and allocated by the preprocessors container.
+TEST(PreprocessorsTest, QuantizationTest) {
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t n_preprocessors = 1;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 7;
+    constexpr size_t original_blob_size = elements * sizeof(float);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    float *original_blob = static_cast<float *>(original_blob_alloc.get());
+    for (size_t i = 0; i < elements; i++) {
+        original_blob[i] = static_cast<float>(i);
+    }
+    
+    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(quant_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(storage_blob, query_blob);
+
+        constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float);
+        uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
+        quant_preprocessor->quantize(original_blob, expected_processed_blob);
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
+                                                       expected_processed_blob,
+                                                       quantized_blob_bytes_count));
+
+    }
+
+}
+
+// Tests the quantization preprocessor with a cosine preprocessor in the chain.
+// The QuantPreprocessor receives an allocated blob from the cosine preprocessor.
+TEST(PreprocessorsTest, QuantizationTestWithCosine) {
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 7;
+    constexpr size_t original_blob_size = elements * sizeof(float);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    float *original_blob = static_cast<float *>(original_blob_alloc.get());
+    for (size_t i = 0; i < elements; i++) {
+        original_blob[i] = static_cast<float>(i);
+    }
+    
+    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
+    auto cosine_preprocessor =
+        new (allocator) CosinePreprocessor<float>(allocator, elements, original_blob_size);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    multiPPContainer.addPreprocessor(quant_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+        ASSERT_NE(storage_blob, query_blob);
+
+        float expected_processed_blob[elements] = {0};
+        memcpy(expected_processed_blob, original_blob, original_blob_size);
+        VecSim_Normalize(expected_processed_blob, elements, VecSimType_FLOAT32);
+        uint8_t quantized_blob[elements * sizeof(uint8_t) + 2 * sizeof(float)] = {0};
+        // quantization should be applied after normalization
+        quant_preprocessor->quantize(expected_processed_blob, quantized_blob);
+        // compare the storage blob to the expected processed blob
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
+                                                       quantized_blob,
+                                                       elements));
+
+    }
+
+}
+
+// Tests the quantization preprocessor with a single preprocessor in the chain.
+// The QuantPreprocessor allocates the storage blob with a size larger than the original blob.
+
+TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) {
+    // Checks that if not enough memory was allocated by a previous preprocessor, the quantization
+    // preprocessor will reallocate it.
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t n_preprocessors = 1;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 2;
+    constexpr size_t original_blob_size = elements * sizeof(float);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    float *original_blob = static_cast<float *>(original_blob_alloc.get());
+    for (size_t i = 0; i < elements; i++) {
+        original_blob[i] = static_cast<float>(i+10);
+    }
+    
+    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(quant_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(storage_blob, query_blob);
+
+        constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float);
+        uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
+        quant_preprocessor->quantize(original_blob, expected_processed_blob);
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
+                                                       expected_processed_blob,
+                                                       quantized_blob_bytes_count));
+    }
+
+}
+
+// Tests the quantization preprocessor with a cosine preprocessor in the chain.
+// The QuantPreprocessor receives an allocated blob from the cosine preprocessor, and needs to reallocate it.
+// because the original blob size is smaller than the processed_bytes_count of the cosine preprocessor.
+TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
+    // Checks that if not enough memory was allocated by a previous preprocessor, the quantization
+    // preprocessor will reallocate it.
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 2;
+    constexpr size_t original_blob_size = elements * sizeof(float);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    float *original_blob = static_cast<float *>(original_blob_alloc.get());
+    for (size_t i = 0; i < elements; i++) {
+        original_blob[i] = static_cast<float>(i+2.5f);
+    }
+    
+    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
+    auto cosine_preprocessor =
+        new (allocator) CosinePreprocessor<float>(allocator, elements, original_blob_size);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(cosine_preprocessor);
+    multiPPContainer.addPreprocessor(quant_preprocessor);
+
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(query_blob, nullptr);
+        ASSERT_NE(storage_blob, query_blob);
+
+        float expected_processed_blob[elements] = {0};
+        memcpy(expected_processed_blob, original_blob, original_blob_size);
+        VecSim_Normalize(expected_processed_blob, elements, VecSimType_FLOAT32);
+        uint8_t quantized_blob[elements * sizeof(uint8_t) + 2 * sizeof(float)] = {0};
+        // quantization should be applied after normalization
+        quant_preprocessor->quantize(expected_processed_blob, quantized_blob);
+        // compare the storage blob to the expected processed blob
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
+                                                       quantized_blob,
+                                                       elements));
+
+    }
+
+}
+
+
+// TEST(PreprocessorsTest, CosineThenQuantizeTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 4;
+
+//     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
+//     auto cosine_preprocessor =
+//         new (allocator) CosinePreprocessor<float>(allocator, dim, dim * sizeof(float));
+
+//     auto multiPPContainer =
+//         MultiPreprocessorsContainer<float, 2>(allocator, 0);
+//     multiPPContainer.addPreprocessor(cosine_preprocessor);
+//     multiPPContainer.addPreprocessor(quant_preprocessor);
+//     // Test input vector with known values
+//     const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
+//     const size_t input_blob_size = dim * sizeof(float);
+//     // Allocate output buffer
+//     auto processed_blobs = multiPPContainer.preprocess(input, input_blob_size);
+//     const void* quantized_blob = processed_blobs.getStorageBlob();
+
+
+// }
+    
+
+
+// TEST(QuantPreprocessorTest, UniformValuesQuantizationTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 3;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     // Test with uniform values (edge case where max == min)
+//     const float input[dim] = {5.0f, 5.0f, 5.0f};
+
+//     const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     auto quantized_blob = allocator->allocate_unique(expected_blob_size);
+//     uint8_t* quantized = static_cast<uint8_t*>(quantized_blob.get());
+
+//     preprocessor.quantize(input, quantized);
+
+//     // Extract metadata
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+//     const float min_val = metadata[0];
+//     const float delta = metadata[1];
+
+//     // When all values are the same, delta should be 1.0f (fallback)
+//     EXPECT_FLOAT_EQ(min_val, 5.0f);
+//     EXPECT_FLOAT_EQ(delta, 1.0f);
+
+//     // All quantized values should be 0
+//     for (size_t i = 0; i < dim; i++) {
+//         EXPECT_EQ(quantized[i], 0);
+//     }
+// }
+
+// TEST(QuantPreprocessorTest, PreprocessForStorageTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 4;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {-1.0f, 0.0f, 1.0f, 2.0f};
+//     size_t input_blob_size = dim * sizeof(float);
+
+//     void* storage_blob = nullptr;
+
+//     // Test preprocessForStorage
+//     preprocessor.preprocessForStorage(input, storage_blob, input_blob_size);
+
+//     // Verify blob was allocated and size was updated
+//     ASSERT_NE(storage_blob, nullptr);
+//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     EXPECT_EQ(input_blob_size, expected_storage_size);
+
+//     // Verify quantization was performed correctly
+//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+
+//     EXPECT_FLOAT_EQ(metadata[0], -1.0f);  // min_val
+//     EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f);  // delta = (2.0f - (-1.0f)) / 255.0f
+
+//     // Clean up
+//     allocator->free_allocation(storage_blob);
+// }
+
+// TEST(QuantPreprocessorTest, PreprocessQueryTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 4;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
+//     size_t query_blob_size = dim * sizeof(float);
+//     unsigned char alignment = 32;
+
+//     void* query_blob = nullptr;
+
+//     // Test preprocessQuery (should be no-op, keeping float32)
+//     preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment);
+
+//     // Verify blob was allocated and remains float32
+//     ASSERT_NE(query_blob, nullptr);
+//     EXPECT_EQ(query_blob_size, dim * sizeof(float));  // Size should remain unchanged
+
+//     // Verify data was copied correctly (no quantization)
+//     const float* query_data = static_cast<const float*>(query_blob);
+//     for (size_t i = 0; i < dim; i++) {
+//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
+//     }
+
+//     // Verify alignment
+//     EXPECT_EQ(reinterpret_cast<uintptr_t>(query_blob) % alignment, 0);
+
+//     // Clean up
+//     allocator->free_allocation(query_blob);
+// }
+
+// TEST(QuantPreprocessorTest, PreprocessQueryInPlaceTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 4;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
+//     size_t input_blob_size = dim * sizeof(float);
+//     unsigned char alignment = 16;
+
+//     // Test preprocessQueryInPlace (should be no-op)
+//     preprocessor.preprocessQueryInPlace(input, input_blob_size, alignment);
+
+//     // Verify data remains unchanged
+//     EXPECT_FLOAT_EQ(input[0], 1.0f);
+//     EXPECT_FLOAT_EQ(input[1], 2.0f);
+//     EXPECT_FLOAT_EQ(input[2], 3.0f);
+//     EXPECT_FLOAT_EQ(input[3], 4.0f);
+// }
+
+// TEST(QuantPreprocessorTest, BackwardCompatibilityPreprocessTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 3;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {0.0f, 5.0f, 10.0f};
+//     size_t input_blob_size = dim * sizeof(float);
+//     unsigned char alignment = 16;
+
+//     void* storage_blob = nullptr;
+//     void* query_blob = nullptr;
+
+//     // Test backward compatibility interface (single blob size)
+//     preprocessor.preprocess(input, storage_blob, query_blob, input_blob_size, alignment);
+
+//     // Verify storage blob was quantized
+//     ASSERT_NE(storage_blob, nullptr);
+//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     EXPECT_EQ(input_blob_size, expected_storage_size);
+
+//     // Verify query blob remains float32
+//     ASSERT_NE(query_blob, nullptr);
+//     const float* query_data = static_cast<const float*>(query_blob);
+//     for (size_t i = 0; i < dim; i++) {
+//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
+//     }
+
+//     // Clean up
+//     allocator->free_allocation(storage_blob);
+//     allocator->free_allocation(query_blob);
+// }
+
+// TEST(QuantPreprocessorTest, DualParameterPreprocessTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 3;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {0.0f, 5.0f, 10.0f};
+//     size_t storage_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     size_t query_blob_size = dim * sizeof(float);
+//     unsigned char alignment = 16;
+
+//     void* storage_blob = nullptr;
+//     void* query_blob = nullptr;
+
+//     // Test dual parameter interface (different storage and query sizes)
+//     preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size, alignment);
+
+//     // Verify storage blob was quantized with correct size
+//     ASSERT_NE(storage_blob, nullptr);
+//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+
+//     EXPECT_FLOAT_EQ(metadata[0], 0.0f);  // min_val
+//     EXPECT_FLOAT_EQ(metadata[1], 10.0f / 255.0f);  // delta
+
+//     // Verify query blob remains float32 with correct size
+//     ASSERT_NE(query_blob, nullptr);
+//     EXPECT_EQ(query_blob_size, dim * sizeof(float));
+//     const float* query_data = static_cast<const float*>(query_blob);
+//     for (size_t i = 0; i < dim; i++) {
+//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
+//     }
+
+//     // Clean up
+//     allocator->free_allocation(storage_blob);
+//     allocator->free_allocation(query_blob);
+// }
+
+// TEST(QuantPreprocessorTest, ExtremeValuesQuantizationTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 4;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     // Test with extreme values
+//     const float input[dim] = {-1000.0f, -0.001f, 0.001f, 1000.0f};
+
+//     const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     auto quantized_blob = allocator->allocate_unique(expected_blob_size);
+//     uint8_t* quantized = static_cast<uint8_t*>(quantized_blob.get());
+
+//     preprocessor.quantize(input, quantized);
+
+//     // Extract metadata
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+//     const float min_val = metadata[0];
+//     const float delta = metadata[1];
+
+//     EXPECT_FLOAT_EQ(min_val, -1000.0f);
+//     EXPECT_FLOAT_EQ(delta, 2000.0f / 255.0f);  // delta = (1000.0f - (-1000.0f)) / 255.0f
+
+//     // Verify quantized values
+//     EXPECT_EQ(quantized[0], 0);    // min value maps to 0
+//     EXPECT_EQ(quantized[3], 255);  // max value maps to 255
+// }
+
+// TEST(QuantPreprocessorTest, LargeDimensionTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 128;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     // Create test vector with linearly increasing values
+//     std::vector<float> input(dim);
+//     for (size_t i = 0; i < dim; i++) {
+//         input[i] = static_cast<float>(i);
+//     }
+
+//     size_t input_blob_size = dim * sizeof(float);
+//     void* storage_blob = nullptr;
+
+//     // Test preprocessForStorage with large dimension
+//     preprocessor.preprocessForStorage(input.data(), storage_blob, input_blob_size);
+
+//     ASSERT_NE(storage_blob, nullptr);
+//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     EXPECT_EQ(input_blob_size, expected_storage_size);
+
+//     // Verify quantization
+//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+
+//     EXPECT_FLOAT_EQ(metadata[0], 0.0f);  // min_val
+//     EXPECT_FLOAT_EQ(metadata[1], (dim - 1.0f) / 255.0f);  // delta
+
+//     // Verify first and last quantized values
+//     EXPECT_EQ(quantized[0], 0);
+//     EXPECT_EQ(quantized[dim - 1], 255);
+
+//     // Clean up
+//     allocator->free_allocation(storage_blob);
+// }
+
+// TEST(QuantPreprocessorTest, MultiPreprocessorsContainerIntegrationTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 4;
+//     constexpr size_t n_preprocessors = 1;
+//     unsigned char alignment = 32;
+
+//     auto multiPPContainer = MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+
+//     // Add QuantPreprocessor to the container
+//     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
+//     ASSERT_EQ(multiPPContainer.addPreprocessor(quant_preprocessor), 0);
+
+//     const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
+//     size_t original_blob_size = dim * sizeof(float);
+
+//     // Test preprocessing through container
+//     ProcessedBlobs processed_blobs = multiPPContainer.preprocess(input, original_blob_size);
+
+//     const void* storage_blob = processed_blobs.getStorageBlob();
+//     const void* query_blob = processed_blobs.getQueryBlob();
+
+//     // Verify storage blob was quantized
+//     ASSERT_NE(storage_blob, nullptr);
+//     ASSERT_NE(storage_blob, input);  // Should be different from original
+
+//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+
+//     EXPECT_FLOAT_EQ(metadata[0], 1.0f);  // min_val
+//     EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f);  // delta
+
+//     // Verify query blob remains float32
+//     ASSERT_NE(query_blob, nullptr);
+//     const float* query_data = static_cast<const float*>(query_blob);
+//     for (size_t i = 0; i < dim; i++) {
+//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
+//     }
+
+//     // Verify query blob alignment
+//     EXPECT_EQ(reinterpret_cast<uintptr_t>(query_blob) % alignment, 0);
+// }
+
+// TEST(QuantPreprocessorTest, PreprocessForStorageWithPreAllocatedBlobTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 3;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {2.0f, 4.0f, 6.0f};
+//     size_t input_blob_size = dim * sizeof(float);
+
+//     // Pre-allocate storage blob
+//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     void* storage_blob = allocator->allocate(expected_storage_size);
+
+//     // Test preprocessForStorage with pre-allocated blob
+//     preprocessor.preprocessForStorage(input, storage_blob, input_blob_size);
+
+//     // Verify size was updated correctly
+//     EXPECT_EQ(input_blob_size, expected_storage_size);
+
+//     // Verify quantization was performed
+//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+
+//     EXPECT_FLOAT_EQ(metadata[0], 2.0f);  // min_val
+//     EXPECT_FLOAT_EQ(metadata[1], 4.0f / 255.0f);  // delta = (6.0f - 2.0f) / 255.0f
+
+//     // Verify quantized values
+//     EXPECT_EQ(quantized[0], 0);    // (2.0f - 2.0f) / delta = 0
+//     EXPECT_EQ(quantized[1], 127);  // (4.0f - 2.0f) / delta ≈ 127
+//     EXPECT_EQ(quantized[2], 255);  // (6.0f - 2.0f) / delta = 255
+
+//     // Clean up
+//     allocator->free_allocation(storage_blob);
+// }
+
+// TEST(QuantPreprocessorTest, PreprocessQueryWithPreAllocatedBlobTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 3;
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {1.5f, 2.5f, 3.5f};
+//     size_t query_blob_size = dim * sizeof(float);
+//     unsigned char alignment = 16;
+
+//     // Pre-allocate query blob
+//     void* query_blob = allocator->allocate_aligned(query_blob_size, alignment);
+
+//     // Test preprocessQuery with pre-allocated blob
+//     preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment);
+
+//     // Verify size remains unchanged
+//     EXPECT_EQ(query_blob_size, dim * sizeof(float));
+
+//     // Verify data was copied correctly (no quantization)
+//     const float* query_data = static_cast<const float*>(query_blob);
+//     for (size_t i = 0; i < dim; i++) {
+//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
+//     }
+
+//     // Clean up
+//     allocator->free_allocation(query_blob);
+// }
+
+// TEST(QuantPreprocessorTest, ZeroDimensionEdgeCaseTest) {
+//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+//     constexpr size_t dim = 1;  // Minimum valid dimension
+
+//     QuantPreprocessor preprocessor(allocator, dim);
+
+//     const float input[dim] = {42.0f};
+
+//     const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
+//     auto quantized_blob = allocator->allocate_unique(expected_blob_size);
+//     uint8_t* quantized = static_cast<uint8_t*>(quantized_blob.get());
+
+//     preprocessor.quantize(input, quantized);
+
+//     // Extract metadata
+//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
+//     const float min_val = metadata[0];
+//     const float delta = metadata[1];
+
+//     // With single value, min == max, so delta should be 1.0f
+//     EXPECT_FLOAT_EQ(min_val, 42.0f);
+//     EXPECT_FLOAT_EQ(delta, 1.0f);
+//     EXPECT_EQ(quantized[0], 0);  // Single value quantizes to 0
+// }
+
diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index 0b7157c0c..f446e46bc 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -4204,6 +4204,17 @@ class PreprocessorDoubleValue : public PreprocessorInterface {
 public:
     PreprocessorDoubleValue(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim) {}
+    
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const override {
+        // This assert makes sure the current use of the preprocessor is valid,
+        // i.e., both blobs are of the same size.
+        // In order to use different sizes, the preprocessor should be modified.
+        assert(storage_blob_size == query_blob_size);
+        preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
+    }
+
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
 

From adec86bb911d329dbf298147634eb39072ea8eba Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Thu, 29 May 2025 18:08:00 +0300
Subject: [PATCH 15/22] frmat

---
 .../spaces/computer/preprocessor_container.h  |  5 +-
 src/VecSim/spaces/computer/preprocessors.h    | 64 ++++++++-------
 tests/unit/test_components.cpp                | 77 ++++++++-----------
 tests/unit/test_hnsw_tiered.cpp               |  6 +-
 4 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index 41728de66..915c3b83a 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -172,7 +172,7 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o
     void *storage_blob = nullptr;
     void *query_blob = nullptr;
 
-    // Sepreated variables for the storage blob size and query_blob_size, 
+    // Sepreated variables for the storage blob size and query_blob_size,
     // in case we need to change their sizes to different values.
     size_t storage_blob_size = input_blob_size;
     size_t query_blob_size = input_blob_size;
@@ -180,7 +180,8 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size, this->alignment);
+        pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size,
+                       this->alignment);
     }
     // At least one blob was allocated.
 
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 725bba098..f3055899f 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -24,7 +24,7 @@ class PreprocessorInterface : public VecsimBaseObject {
         : VecsimBaseObject(allocator) {}
     // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
     // are the same size.
-    //TODO: Add query_blob_size as a parameter to the preprocess functions, to allow
+    // TODO: Add query_blob_size as a parameter to the preprocess functions, to allow
     // different sizes for storage and query blobs in the future, if needed.
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                             size_t &input_blob_size, unsigned char alignment) const = 0;
@@ -50,7 +50,6 @@ class CosinePreprocessor : public PreprocessorInterface {
         : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
           dim(dim), processed_bytes_count(processed_bytes_count) {}
 
-
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &storage_blob_size, size_t &query_blob_size,
                     unsigned char alignment) const override {
@@ -61,7 +60,8 @@ class CosinePreprocessor : public PreprocessorInterface {
         assert(storage_blob_size == query_blob_size);
 
         preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
-        query_blob_size = storage_blob_size; // Ensure both blobs have the same size after processing.
+        query_blob_size =
+            storage_blob_size; // Ensure both blobs have the same size after processing.
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
@@ -149,13 +149,11 @@ class CosinePreprocessor : public PreprocessorInterface {
     const size_t processed_bytes_count;
 };
 
-
-// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision of uint8_t.
-// The quantization is done by finding the minimum and maximum values of the input vector, and then
-// scaling the values to fit in the range of [0, 255]. The quantized values are then stored in a uint8_t array.
-// [Quantized values, min, delta]
-// Quantized Blob size  =  dim_elements * sizeof(int8)  +  2 * sizeof(float)
-// delta = (max_val - min_val) / 255.0f
+// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision
+// of uint8_t. The quantization is done by finding the minimum and maximum values of the input
+// vector, and then scaling the values to fit in the range of [0, 255]. The quantized values are
+// then stored in a uint8_t array. [Quantized values, min, delta] Quantized Blob size  =
+// dim_elements * sizeof(int8)  +  2 * sizeof(float) delta = (max_val - min_val) / 255.0f
 // quantized_v[i] = (v[i] - min_val) / delta
 // preprocessForStorage:
 // if null:
@@ -164,17 +162,16 @@ class CosinePreprocessor : public PreprocessorInterface {
 // 3. Compute (min, delta) and quantize to the quantized blob or in place.
 // preprocessQuery: No-op – queries arrive as float32 and remain uncompressed
 
-
 class QuantPreprocessor : public PreprocessorInterface {
 public:
     // Constructor for backward compatibility (single blob size)
     QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim),
-          storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {} // quantized + min + delta + inverted_norm {}
-
+          storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {
+    } // quantized + min + delta + inverted_norm {}
 
     // Helper function to perform quantization
-    void quantize(const float* input, uint8_t* quantized) const {
+    void quantize(const float *input, uint8_t *quantized) const {
         assert(dim > 0);
         // Find min and max values
         float min_val = input[0];
@@ -196,7 +193,7 @@ class QuantPreprocessor : public PreprocessorInterface {
 
         // Reserve space for metadata at the end of the blob
         // [quantized values, min_val, delta, inverted_norm]
-        float* metadata = reinterpret_cast<float*>(quantized + this->dim);
+        float *metadata = reinterpret_cast<float *>(quantized + this->dim);
 
         // Store min_val, delta, and inverted_norm in the metadata
         metadata[0] = min_val;
@@ -204,40 +201,39 @@ class QuantPreprocessor : public PreprocessorInterface {
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t &input_blob_size, unsigned char alignment) const override {
+                    size_t &input_blob_size, unsigned char alignment) const override {
         // For backward compatibility - both blobs have the same size
-        preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size, alignment);
+        preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size,
+                   alignment);
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t &storage_blob_size, size_t &query_blob_size,
-                            unsigned char alignment) const override {
-
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
 
-        if (storage_blob)
-        {
+        if (storage_blob) {
             if (storage_blob == query_blob) {
-                storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
+                storage_blob =
+                    this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
             } else if (storage_blob_size < storage_bytes_count) {
-                // Check if the blob allocated by a previous preprocessor is big enough, otherwise, realloc it.
-                // Can happen when the dim is smaller than the quantization metadata.
-                // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8 bytes.
-                // But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) = 10 bytes.
+                // Check if the blob allocated by a previous preprocessor is big enough, otherwise,
+                // realloc it. Can happen when the dim is smaller than the quantization metadata.
+                // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8
+                // bytes. But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) =
+                // 10 bytes.
                 storage_blob = this->allocator->reallocate(storage_blob, this->storage_bytes_count);
             }
 
-        }
-        else {
+        } else {
             // storage_blob is nullptr, so we need to allocate new memory
             storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
         }
         storage_blob_size = this->storage_bytes_count;
 
         // Cast to appropriate types
-        const float* input = static_cast<const float*>(original_blob);
-        uint8_t* quantized = static_cast<uint8_t*>(storage_blob);
+        const float *input = static_cast<const float *>(original_blob);
+        uint8_t *quantized = static_cast<uint8_t *>(storage_blob);
         quantize(input, quantized);
-
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
@@ -248,8 +244,8 @@ class QuantPreprocessor : public PreprocessorInterface {
         }
 
         // Cast to appropriate types
-        const float* input = static_cast<const float*>(original_blob);
-        uint8_t* quantized = static_cast<uint8_t*>(blob);
+        const float *input = static_cast<const float *>(original_blob);
+        uint8_t *quantized = static_cast<uint8_t *>(blob);
         quantize(input, quantized);
 
         input_blob_size = storage_bytes_count;
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 14dd78898..25c83b5d3 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -70,8 +70,8 @@ class DummyStoragePreprocessor : public PreprocessorInterface {
                     size_t &storage_blob_size, size_t &query_blob_size,
                     unsigned char alignment) const override {
         // This assert verifies that there's no use for this function for now - different sizes for
-        // storage and query blobs. If such a use case arises, we can remove the assert and implement
-        // the logic to handle different sizes.
+        // storage and query blobs. If such a use case arises, we can remove the assert and
+        // implement the logic to handle different sizes.
         assert(storage_blob_size == query_blob_size);
 
         preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
@@ -122,11 +122,11 @@ class DummyQueryPreprocessor : public PreprocessorInterface {
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t &storage_blob_size, size_t &query_blob_size,
-                            unsigned char alignment) const override {
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
         // This assert verifies that there's no use for this function for now - different sizes for
-        // storage and query blobs. If such a use case arises, we can remove the assert and implement
-        // the logic to handle different sizes.
+        // storage and query blobs. If such a use case arises, we can remove the assert and
+        // implement the logic to handle different sizes.
         assert(storage_blob_size == query_blob_size);
 
         preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
@@ -170,8 +170,8 @@ class DummyMixedPreprocessor : public PreprocessorInterface {
         : PreprocessorInterface(allocator), value_to_add_storage(value_to_add_storage),
           value_to_add_query(value_to_add_query) {}
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t &storage_blob_size, size_t &query_blob_size,
-                            unsigned char alignment) const override {
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
         preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
     }
 
@@ -236,8 +236,8 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
     static constexpr unsigned char getExcessValue() { return excess_value; }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t &storage_blob_size, size_t &query_blob_size,
-                            unsigned char alignment) const override {
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
         // if the blobs are equal,
         if (storage_blob == query_blob) {
             preprocessGeneral(original_blob, storage_blob, storage_blob_size, alignment);
@@ -247,7 +247,7 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
         }
     }
 
-            // If the input blob size is not enough
+    // If the input blob size is not enough
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
         // if the blobs are equal,
@@ -1015,7 +1015,7 @@ TEST(PreprocessorsTest, QuantizationTest) {
     for (size_t i = 0; i < elements; i++) {
         original_blob[i] = static_cast<float>(i);
     }
-    
+
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
     auto multiPPContainer =
         MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
@@ -1030,15 +1030,14 @@ TEST(PreprocessorsTest, QuantizationTest) {
         ASSERT_NE(storage_blob, nullptr);
         ASSERT_NE(storage_blob, query_blob);
 
-        constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float);
+        constexpr size_t quantized_blob_bytes_count =
+            elements * sizeof(uint8_t) + 2 * sizeof(float);
         uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
         quant_preprocessor->quantize(original_blob, expected_processed_blob);
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
-                                                       expected_processed_blob,
-                                                       quantized_blob_bytes_count));
-
+                                                        expected_processed_blob,
+                                                        quantized_blob_bytes_count));
     }
-
 }
 
 // Tests the quantization preprocessor with a cosine preprocessor in the chain.
@@ -1054,7 +1053,7 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) {
     for (size_t i = 0; i < elements; i++) {
         original_blob[i] = static_cast<float>(i);
     }
-    
+
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
     auto cosine_preprocessor =
         new (allocator) CosinePreprocessor<float>(allocator, elements, original_blob_size);
@@ -1081,11 +1080,8 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) {
         quant_preprocessor->quantize(expected_processed_blob, quantized_blob);
         // compare the storage blob to the expected processed blob
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
-                                                       quantized_blob,
-                                                       elements));
-
+                                                        quantized_blob, elements));
     }
-
 }
 
 // Tests the quantization preprocessor with a single preprocessor in the chain.
@@ -1102,9 +1098,9 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) {
     auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
     float *original_blob = static_cast<float *>(original_blob_alloc.get());
     for (size_t i = 0; i < elements; i++) {
-        original_blob[i] = static_cast<float>(i+10);
+        original_blob[i] = static_cast<float>(i + 10);
     }
-    
+
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
     auto multiPPContainer =
         MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
@@ -1118,19 +1114,20 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) {
         ASSERT_NE(storage_blob, nullptr);
         ASSERT_NE(storage_blob, query_blob);
 
-        constexpr size_t quantized_blob_bytes_count = elements * sizeof(uint8_t) + 2 * sizeof(float);
+        constexpr size_t quantized_blob_bytes_count =
+            elements * sizeof(uint8_t) + 2 * sizeof(float);
         uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
         quant_preprocessor->quantize(original_blob, expected_processed_blob);
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
-                                                       expected_processed_blob,
-                                                       quantized_blob_bytes_count));
+                                                        expected_processed_blob,
+                                                        quantized_blob_bytes_count));
     }
-
 }
 
 // Tests the quantization preprocessor with a cosine preprocessor in the chain.
-// The QuantPreprocessor receives an allocated blob from the cosine preprocessor, and needs to reallocate it.
-// because the original blob size is smaller than the processed_bytes_count of the cosine preprocessor.
+// The QuantPreprocessor receives an allocated blob from the cosine preprocessor, and needs to
+// reallocate it. because the original blob size is smaller than the processed_bytes_count of the
+// cosine preprocessor.
 TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
     // Checks that if not enough memory was allocated by a previous preprocessor, the quantization
     // preprocessor will reallocate it.
@@ -1142,9 +1139,9 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
     auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
     float *original_blob = static_cast<float *>(original_blob_alloc.get());
     for (size_t i = 0; i < elements; i++) {
-        original_blob[i] = static_cast<float>(i+2.5f);
+        original_blob[i] = static_cast<float>(i + 2.5f);
     }
-    
+
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
     auto cosine_preprocessor =
         new (allocator) CosinePreprocessor<float>(allocator, elements, original_blob_size);
@@ -1171,14 +1168,10 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
         quant_preprocessor->quantize(expected_processed_blob, quantized_blob);
         // compare the storage blob to the expected processed blob
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
-                                                       quantized_blob,
-                                                       elements));
-
+                                                        quantized_blob, elements));
     }
-
 }
 
-
 // TEST(PreprocessorsTest, CosineThenQuantizeTest) {
 //     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
 //     constexpr size_t dim = 4;
@@ -1198,10 +1191,7 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
 //     auto processed_blobs = multiPPContainer.preprocess(input, input_blob_size);
 //     const void* quantized_blob = processed_blobs.getStorageBlob();
 
-
 // }
-    
-
 
 // TEST(QuantPreprocessorTest, UniformValuesQuantizationTest) {
 //     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
@@ -1363,7 +1353,8 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
 //     void* query_blob = nullptr;
 
 //     // Test dual parameter interface (different storage and query sizes)
-//     preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size, alignment);
+//     preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size,
+//     alignment);
 
 //     // Verify storage blob was quantized with correct size
 //     ASSERT_NE(storage_blob, nullptr);
@@ -1457,7 +1448,8 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
 //     constexpr size_t n_preprocessors = 1;
 //     unsigned char alignment = 32;
 
-//     auto multiPPContainer = MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+//     auto multiPPContainer = MultiPreprocessorsContainer<float, n_preprocessors>(allocator,
+//     alignment);
 
 //     // Add QuantPreprocessor to the container
 //     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
@@ -1581,4 +1573,3 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
 //     EXPECT_FLOAT_EQ(delta, 1.0f);
 //     EXPECT_EQ(quantized[0], 0);  // Single value quantizes to 0
 // }
-
diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index f446e46bc..28a0bc1b2 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -4204,10 +4204,10 @@ class PreprocessorDoubleValue : public PreprocessorInterface {
 public:
     PreprocessorDoubleValue(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim) {}
-    
+
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
-                            size_t &storage_blob_size, size_t &query_blob_size,
-                            unsigned char alignment) const override {
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
         // This assert makes sure the current use of the preprocessor is valid,
         // i.e., both blobs are of the same size.
         // In order to use different sizes, the preprocessor should be modified.

From 31a0c7df660107288f6cceb98688e67b162a98b8 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Tue, 3 Jun 2025 16:54:19 +0300
Subject: [PATCH 16/22] Fix and add tests

---
 src/VecSim/spaces/computer/preprocessors.h | 120 +++--
 tests/unit/test_components.cpp             | 526 +++++----------------
 2 files changed, 205 insertions(+), 441 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index f3055899f..26811f946 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -60,8 +60,8 @@ class CosinePreprocessor : public PreprocessorInterface {
         assert(storage_blob_size == query_blob_size);
 
         preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
-        query_blob_size =
-            storage_blob_size; // Ensure both blobs have the same size after processing.
+        // Ensure both blobs have the same size after processing.
+        query_blob_size = storage_blob_size;
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
@@ -168,18 +168,14 @@ class QuantPreprocessor : public PreprocessorInterface {
     QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim),
           storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {
-    } // quantized + min + delta + inverted_norm {}
+    } // quantized + min + delta{}
 
-    // Helper function to perform quantization
+    // Helper function to perform quantization. This function is used by both preprocess and
+    // supports in-place quantization of the storage blob.
     void quantize(const float *input, uint8_t *quantized) const {
-        assert(dim > 0);
+        assert(input && quantized);
         // Find min and max values
-        float min_val = input[0];
-        float max_val = input[0];
-        for (size_t i = 1; i < this->dim; i++) {
-            min_val = std::min(min_val, input[i]);
-            max_val = std::max(max_val, input[i]);
-        }
+        auto [min_val, max_val] = find_min_max(input);
 
         // Calculate scaling factor
         const float diff = (max_val - min_val);
@@ -191,18 +187,16 @@ class QuantPreprocessor : public PreprocessorInterface {
             quantized[i] = static_cast<uint8_t>(std::round((input[i] - min_val) * inv_delta));
         }
 
-        // Reserve space for metadata at the end of the blob
-        // [quantized values, min_val, delta, inverted_norm]
         float *metadata = reinterpret_cast<float *>(quantized + this->dim);
 
-        // Store min_val, delta, and inverted_norm in the metadata
+        // Store min_val, delta, in the metadata
         metadata[0] = min_val;
         metadata[1] = delta;
     }
 
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
-        // For backward compatibility - both blobs have the same size
+        // For backward compatibility - delegate to the two-size version with identical sizes
         preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size,
                    alignment);
     }
@@ -210,30 +204,56 @@ class QuantPreprocessor : public PreprocessorInterface {
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &storage_blob_size, size_t &query_blob_size,
                     unsigned char alignment) const override {
-
-        if (storage_blob) {
+        // CASE 1: STORAGE BLOB NEEDS ALLOCATION
+        if (!storage_blob) {
+            // Allocate aligned memory for the quantized storage blob
+            storage_blob = static_cast<uint8_t *>(
+                this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
+
+            // Quantize directly from original data
+            const float *input = static_cast<const float *>(original_blob);
+            quantize(input, static_cast<uint8_t *>(storage_blob));
+        }
+        // CASE 2: STORAGE BLOB EXISTS
+        else {
+            // CASE 2A: STORAGE AND QUERY SHARE MEMORY
             if (storage_blob == query_blob) {
-                storage_blob =
+                // Need to allocate a separate storage blob since query remains float32
+                // while storage needs to be quantized
+                void *new_storage =
                     this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
-            } else if (storage_blob_size < storage_bytes_count) {
-                // Check if the blob allocated by a previous preprocessor is big enough, otherwise,
-                // realloc it. Can happen when the dim is smaller than the quantization metadata.
-                // For example, din size is 2 so the storage_blob_size is 2 * sizeof(float) = 8
-                // bytes. But the quantized blob size is 2 * sizeof(uint8_t) + 2 * sizeof(float) =
-                // 10 bytes.
-                storage_blob = this->allocator->reallocate(storage_blob, this->storage_bytes_count);
-            }
 
-        } else {
-            // storage_blob is nullptr, so we need to allocate new memory
-            storage_blob = this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
+                // Quantize from the shared blob (query_blob) to the new storage blob
+                quantize(static_cast<const float *>(query_blob),
+                         static_cast<uint8_t *>(new_storage));
+
+                // Update storage_blob to point to the new memory
+                storage_blob = new_storage;
+            }
+            // CASE 2B: SEPARATE STORAGE AND QUERY BLOBS
+            else {
+                // Check if storage blob needs resizing
+                if (storage_blob_size < this->storage_bytes_count) {
+                    // Allocate new storage with correct size
+                    uint8_t *new_storage = static_cast<uint8_t *>(
+                        this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
+
+                    // Quantize from old storage to new storage
+                    quantize(static_cast<const float *>(storage_blob),
+                             static_cast<uint8_t *>(new_storage));
+
+                    // Free old storage and update pointer
+                    this->allocator->free_allocation(storage_blob);
+                    storage_blob = new_storage;
+                } else {
+                    // Storage blob is large enough, quantize in-place
+                    quantize(static_cast<const float *>(storage_blob),
+                             static_cast<uint8_t *>(storage_blob));
+                }
+            }
         }
-        storage_blob_size = this->storage_bytes_count;
 
-        // Cast to appropriate types
-        const float *input = static_cast<const float *>(original_blob);
-        uint8_t *quantized = static_cast<uint8_t *>(storage_blob);
-        quantize(input, quantized);
+        storage_blob_size = this->storage_bytes_count;
     }
 
     void preprocessForStorage(const void *original_blob, void *&blob,
@@ -267,12 +287,42 @@ class QuantPreprocessor : public PreprocessorInterface {
     }
 
     void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
-        // This function is unused for this preprocessor.
         assert(original_blob);
         assert(input_blob_size >= storage_bytes_count);
+
+        // Only quantize in-place if input buffer is large enough
+        if (input_blob_size >= storage_bytes_count) {
+            quantize(static_cast<const float *>(original_blob),
+                     static_cast<uint8_t *>(original_blob));
+        } else {
+            // Fallback: this shouldn't happen if caller allocated correctly
+            assert(false && "Input buffer too small for in-place quantization");
+        }
     }
 
 private:
+    std::pair<float, float> find_min_max(const float *input) const {
+        float min_val = input[0];
+        float max_val = input[0];
+
+        size_t i = 1;
+        // Process 4 elements at a time for better performance
+        for (; i + 3 < dim; i += 4) {
+            const float v0 = input[i];
+            const float v1 = input[i + 1];
+            const float v2 = input[i + 2];
+            const float v3 = input[i + 3];
+            min_val = std::min({min_val, v0, v1, v2, v3});
+            max_val = std::max({max_val, v0, v1, v2, v3});
+        }
+        // Handle remaining elements
+        for (; i < dim; i++) {
+            min_val = std::min(min_val, input[i]);
+            max_val = std::max(max_val, input[i]);
+        }
+        return {min_val, max_val};
+    }
+
     const size_t dim;
     const size_t storage_bytes_count;
 };
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 25c83b5d3..96a925d39 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -243,7 +243,6 @@ class DummyChangeAllocSizePreprocessor : public PreprocessorInterface {
             preprocessGeneral(original_blob, storage_blob, storage_blob_size, alignment);
             query_blob = storage_blob;
             query_blob_size = storage_blob_size;
-            return;
         }
     }
 
@@ -1012,9 +1011,7 @@ TEST(PreprocessorsTest, QuantizationTest) {
     constexpr size_t original_blob_size = elements * sizeof(float);
     auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
     float *original_blob = static_cast<float *>(original_blob_alloc.get());
-    for (size_t i = 0; i < elements; i++) {
-        original_blob[i] = static_cast<float>(i);
-    }
+    test_utils::populate_float_vec(original_blob, elements);
 
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
     auto multiPPContainer =
@@ -1032,11 +1029,33 @@ TEST(PreprocessorsTest, QuantizationTest) {
 
         constexpr size_t quantized_blob_bytes_count =
             elements * sizeof(uint8_t) + 2 * sizeof(float);
-        uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
+        auto expected_processed_blob_alloc = allocator->allocate_unique(quantized_blob_bytes_count);
+        uint8_t *expected_processed_blob =
+            static_cast<uint8_t *>(expected_processed_blob_alloc.get());
+
         quant_preprocessor->quantize(original_blob, expected_processed_blob);
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
                                                         expected_processed_blob,
                                                         quantized_blob_bytes_count));
+
+        // Compare the min and delta values of the quantized blob and expected_processed_blob
+        const float *storage_blob_metadata =
+            reinterpret_cast<const float *>(static_cast<const uint8_t *>(storage_blob) + elements);
+        const float *qexpected_processed_metadata =
+            reinterpret_cast<const float *>(expected_processed_blob + elements);
+        // Check that the min and delta values are close enough
+        ASSERT_NEAR(storage_blob_metadata[0], qexpected_processed_metadata[0], 1e-6);
+        ASSERT_NEAR(storage_blob_metadata[1], qexpected_processed_metadata[1], 1e-6);
+
+        float min = storage_blob_metadata[0];
+        float delta = storage_blob_metadata[1];
+        // reconstruct the original blob from the quantized blob
+        uint8_t *uint8_storage_blob = static_cast<uint8_t *>(const_cast<void *>(storage_blob));
+        for (size_t i = 0; i < elements; i++) {
+            float reconstructed_value = min + uint8_storage_blob[i] * delta;
+            ASSERT_NEAR(reconstructed_value, original_blob[i], 0.01)
+                << "Reconstructed blob differs from the original blob at index " << i;
+        }
     }
 }
 
@@ -1050,9 +1069,7 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) {
     constexpr size_t original_blob_size = elements * sizeof(float);
     auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
     float *original_blob = static_cast<float *>(original_blob_alloc.get());
-    for (size_t i = 0; i < elements; i++) {
-        original_blob[i] = static_cast<float>(i);
-    }
+    test_utils::populate_float_vec(original_blob, elements);
 
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
     auto cosine_preprocessor =
@@ -1072,15 +1089,27 @@ TEST(PreprocessorsTest, QuantizationTestWithCosine) {
         ASSERT_NE(query_blob, nullptr);
         ASSERT_NE(storage_blob, query_blob);
 
-        float expected_processed_blob[elements] = {0};
+        auto expected_processed_blob_alloc = allocator->allocate_unique(original_blob_size);
+        float *expected_processed_blob = static_cast<float *>(expected_processed_blob_alloc.get());
         memcpy(expected_processed_blob, original_blob, original_blob_size);
         VecSim_Normalize(expected_processed_blob, elements, VecSimType_FLOAT32);
-        uint8_t quantized_blob[elements * sizeof(uint8_t) + 2 * sizeof(float)] = {0};
+
+        auto quantized_blob_alloc =
+            allocator->allocate_unique(elements * sizeof(uint8_t) + 2 * sizeof(float));
+        uint8_t *quantized_blob = static_cast<uint8_t *>(quantized_blob_alloc.get());
+
         // quantization should be applied after normalization
         quant_preprocessor->quantize(expected_processed_blob, quantized_blob);
         // compare the storage blob to the expected processed blob
         EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
                                                         quantized_blob, elements));
+        const float *storage_blob_metadata =
+            reinterpret_cast<const float *>(static_cast<const uint8_t *>(storage_blob) + elements);
+        const float *qexpected_processed_metadata =
+            reinterpret_cast<const float *>(quantized_blob + elements);
+        // Check that the min and delta values are close enough
+        ASSERT_FLOAT_EQ(storage_blob_metadata[0], qexpected_processed_metadata[0]);
+        ASSERT_FLOAT_EQ(storage_blob_metadata[1], qexpected_processed_metadata[1]);
     }
 }
 
@@ -1091,19 +1120,20 @@ TEST(PreprocessorsTest, ReallocateVectorQuantizationTest) {
     // Checks that if not enough memory was allocated by a previous preprocessor, the quantization
     // preprocessor will reallocate it.
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-    constexpr size_t n_preprocessors = 1;
+    constexpr size_t n_preprocessors = 2;
     constexpr size_t alignment = 5;
     constexpr size_t elements = 2;
     constexpr size_t original_blob_size = elements * sizeof(float);
     auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
     float *original_blob = static_cast<float *>(original_blob_alloc.get());
-    for (size_t i = 0; i < elements; i++) {
-        original_blob[i] = static_cast<float>(i + 10);
-    }
+    test_utils::populate_float_vec(original_blob, elements);
 
     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
+    auto dummy_preprocessor =
+        new (allocator) dummyPreprocessors::DummyStoragePreprocessor<float>(allocator, 0.0f);
     auto multiPPContainer =
         MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(dummy_preprocessor);
     multiPPContainer.addPreprocessor(quant_preprocessor);
     {
         ProcessedBlobs processed_blobs =
@@ -1172,404 +1202,88 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
     }
 }
 
-// TEST(PreprocessorsTest, CosineThenQuantizeTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 4;
-
-//     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
-//     auto cosine_preprocessor =
-//         new (allocator) CosinePreprocessor<float>(allocator, dim, dim * sizeof(float));
-
-//     auto multiPPContainer =
-//         MultiPreprocessorsContainer<float, 2>(allocator, 0);
-//     multiPPContainer.addPreprocessor(cosine_preprocessor);
-//     multiPPContainer.addPreprocessor(quant_preprocessor);
-//     // Test input vector with known values
-//     const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
-//     const size_t input_blob_size = dim * sizeof(float);
-//     // Allocate output buffer
-//     auto processed_blobs = multiPPContainer.preprocess(input, input_blob_size);
-//     const void* quantized_blob = processed_blobs.getStorageBlob();
-
-// }
-
-// TEST(QuantPreprocessorTest, UniformValuesQuantizationTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 3;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     // Test with uniform values (edge case where max == min)
-//     const float input[dim] = {5.0f, 5.0f, 5.0f};
-
-//     const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     auto quantized_blob = allocator->allocate_unique(expected_blob_size);
-//     uint8_t* quantized = static_cast<uint8_t*>(quantized_blob.get());
-
-//     preprocessor.quantize(input, quantized);
-
-//     // Extract metadata
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-//     const float min_val = metadata[0];
-//     const float delta = metadata[1];
-
-//     // When all values are the same, delta should be 1.0f (fallback)
-//     EXPECT_FLOAT_EQ(min_val, 5.0f);
-//     EXPECT_FLOAT_EQ(delta, 1.0f);
-
-//     // All quantized values should be 0
-//     for (size_t i = 0; i < dim; i++) {
-//         EXPECT_EQ(quantized[i], 0);
-//     }
-// }
-
-// TEST(QuantPreprocessorTest, PreprocessForStorageTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 4;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     const float input[dim] = {-1.0f, 0.0f, 1.0f, 2.0f};
-//     size_t input_blob_size = dim * sizeof(float);
-
-//     void* storage_blob = nullptr;
-
-//     // Test preprocessForStorage
-//     preprocessor.preprocessForStorage(input, storage_blob, input_blob_size);
-
-//     // Verify blob was allocated and size was updated
-//     ASSERT_NE(storage_blob, nullptr);
-//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     EXPECT_EQ(input_blob_size, expected_storage_size);
-
-//     // Verify quantization was performed correctly
-//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-
-//     EXPECT_FLOAT_EQ(metadata[0], -1.0f);  // min_val
-//     EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f);  // delta = (2.0f - (-1.0f)) / 255.0f
-
-//     // Clean up
-//     allocator->free_allocation(storage_blob);
-// }
-
-// TEST(QuantPreprocessorTest, PreprocessQueryTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 4;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
-//     size_t query_blob_size = dim * sizeof(float);
-//     unsigned char alignment = 32;
-
-//     void* query_blob = nullptr;
-
-//     // Test preprocessQuery (should be no-op, keeping float32)
-//     preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment);
-
-//     // Verify blob was allocated and remains float32
-//     ASSERT_NE(query_blob, nullptr);
-//     EXPECT_EQ(query_blob_size, dim * sizeof(float));  // Size should remain unchanged
-
-//     // Verify data was copied correctly (no quantization)
-//     const float* query_data = static_cast<const float*>(query_blob);
-//     for (size_t i = 0; i < dim; i++) {
-//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
-//     }
-
-//     // Verify alignment
-//     EXPECT_EQ(reinterpret_cast<uintptr_t>(query_blob) % alignment, 0);
-
-//     // Clean up
-//     allocator->free_allocation(query_blob);
-// }
-
-// TEST(QuantPreprocessorTest, PreprocessQueryInPlaceTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 4;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
-//     size_t input_blob_size = dim * sizeof(float);
-//     unsigned char alignment = 16;
-
-//     // Test preprocessQueryInPlace (should be no-op)
-//     preprocessor.preprocessQueryInPlace(input, input_blob_size, alignment);
-
-//     // Verify data remains unchanged
-//     EXPECT_FLOAT_EQ(input[0], 1.0f);
-//     EXPECT_FLOAT_EQ(input[1], 2.0f);
-//     EXPECT_FLOAT_EQ(input[2], 3.0f);
-//     EXPECT_FLOAT_EQ(input[3], 4.0f);
-// }
-
-// TEST(QuantPreprocessorTest, BackwardCompatibilityPreprocessTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 3;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     const float input[dim] = {0.0f, 5.0f, 10.0f};
-//     size_t input_blob_size = dim * sizeof(float);
-//     unsigned char alignment = 16;
-
-//     void* storage_blob = nullptr;
-//     void* query_blob = nullptr;
-
-//     // Test backward compatibility interface (single blob size)
-//     preprocessor.preprocess(input, storage_blob, query_blob, input_blob_size, alignment);
-
-//     // Verify storage blob was quantized
-//     ASSERT_NE(storage_blob, nullptr);
-//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     EXPECT_EQ(input_blob_size, expected_storage_size);
-
-//     // Verify query blob remains float32
-//     ASSERT_NE(query_blob, nullptr);
-//     const float* query_data = static_cast<const float*>(query_blob);
-//     for (size_t i = 0; i < dim; i++) {
-//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
-//     }
-
-//     // Clean up
-//     allocator->free_allocation(storage_blob);
-//     allocator->free_allocation(query_blob);
-// }
-
-// TEST(QuantPreprocessorTest, DualParameterPreprocessTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 3;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     const float input[dim] = {0.0f, 5.0f, 10.0f};
-//     size_t storage_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     size_t query_blob_size = dim * sizeof(float);
-//     unsigned char alignment = 16;
-
-//     void* storage_blob = nullptr;
-//     void* query_blob = nullptr;
-
-//     // Test dual parameter interface (different storage and query sizes)
-//     preprocessor.preprocess(input, storage_blob, query_blob, storage_blob_size, query_blob_size,
-//     alignment);
-
-//     // Verify storage blob was quantized with correct size
-//     ASSERT_NE(storage_blob, nullptr);
-//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-
-//     EXPECT_FLOAT_EQ(metadata[0], 0.0f);  // min_val
-//     EXPECT_FLOAT_EQ(metadata[1], 10.0f / 255.0f);  // delta
-
-//     // Verify query blob remains float32 with correct size
-//     ASSERT_NE(query_blob, nullptr);
-//     EXPECT_EQ(query_blob_size, dim * sizeof(float));
-//     const float* query_data = static_cast<const float*>(query_blob);
-//     for (size_t i = 0; i < dim; i++) {
-//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
-//     }
-
-//     // Clean up
-//     allocator->free_allocation(storage_blob);
-//     allocator->free_allocation(query_blob);
-// }
-
-// TEST(QuantPreprocessorTest, ExtremeValuesQuantizationTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 4;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     // Test with extreme values
-//     const float input[dim] = {-1000.0f, -0.001f, 0.001f, 1000.0f};
-
-//     const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     auto quantized_blob = allocator->allocate_unique(expected_blob_size);
-//     uint8_t* quantized = static_cast<uint8_t*>(quantized_blob.get());
-
-//     preprocessor.quantize(input, quantized);
-
-//     // Extract metadata
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-//     const float min_val = metadata[0];
-//     const float delta = metadata[1];
-
-//     EXPECT_FLOAT_EQ(min_val, -1000.0f);
-//     EXPECT_FLOAT_EQ(delta, 2000.0f / 255.0f);  // delta = (1000.0f - (-1000.0f)) / 255.0f
-
-//     // Verify quantized values
-//     EXPECT_EQ(quantized[0], 0);    // min value maps to 0
-//     EXPECT_EQ(quantized[3], 255);  // max value maps to 255
-// }
-
-// TEST(QuantPreprocessorTest, LargeDimensionTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 128;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     // Create test vector with linearly increasing values
-//     std::vector<float> input(dim);
-//     for (size_t i = 0; i < dim; i++) {
-//         input[i] = static_cast<float>(i);
-//     }
-
-//     size_t input_blob_size = dim * sizeof(float);
-//     void* storage_blob = nullptr;
-
-//     // Test preprocessForStorage with large dimension
-//     preprocessor.preprocessForStorage(input.data(), storage_blob, input_blob_size);
-
-//     ASSERT_NE(storage_blob, nullptr);
-//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     EXPECT_EQ(input_blob_size, expected_storage_size);
-
-//     // Verify quantization
-//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-
-//     EXPECT_FLOAT_EQ(metadata[0], 0.0f);  // min_val
-//     EXPECT_FLOAT_EQ(metadata[1], (dim - 1.0f) / 255.0f);  // delta
-
-//     // Verify first and last quantized values
-//     EXPECT_EQ(quantized[0], 0);
-//     EXPECT_EQ(quantized[dim - 1], 255);
-
-//     // Clean up
-//     allocator->free_allocation(storage_blob);
-// }
-
-// TEST(QuantPreprocessorTest, MultiPreprocessorsContainerIntegrationTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 4;
-//     constexpr size_t n_preprocessors = 1;
-//     unsigned char alignment = 32;
-
-//     auto multiPPContainer = MultiPreprocessorsContainer<float, n_preprocessors>(allocator,
-//     alignment);
-
-//     // Add QuantPreprocessor to the container
-//     auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
-//     ASSERT_EQ(multiPPContainer.addPreprocessor(quant_preprocessor), 0);
-
-//     const float input[dim] = {1.0f, 2.0f, 3.0f, 4.0f};
-//     size_t original_blob_size = dim * sizeof(float);
-
-//     // Test preprocessing through container
-//     ProcessedBlobs processed_blobs = multiPPContainer.preprocess(input, original_blob_size);
-
-//     const void* storage_blob = processed_blobs.getStorageBlob();
-//     const void* query_blob = processed_blobs.getQueryBlob();
-
-//     // Verify storage blob was quantized
-//     ASSERT_NE(storage_blob, nullptr);
-//     ASSERT_NE(storage_blob, input);  // Should be different from original
-
-//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-
-//     EXPECT_FLOAT_EQ(metadata[0], 1.0f);  // min_val
-//     EXPECT_FLOAT_EQ(metadata[1], 3.0f / 255.0f);  // delta
-
-//     // Verify query blob remains float32
-//     ASSERT_NE(query_blob, nullptr);
-//     const float* query_data = static_cast<const float*>(query_blob);
-//     for (size_t i = 0; i < dim; i++) {
-//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
-//     }
-
-//     // Verify query blob alignment
-//     EXPECT_EQ(reinterpret_cast<uintptr_t>(query_blob) % alignment, 0);
-// }
-
-// TEST(QuantPreprocessorTest, PreprocessForStorageWithPreAllocatedBlobTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 3;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     const float input[dim] = {2.0f, 4.0f, 6.0f};
-//     size_t input_blob_size = dim * sizeof(float);
-
-//     // Pre-allocate storage blob
-//     const size_t expected_storage_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     void* storage_blob = allocator->allocate(expected_storage_size);
-
-//     // Test preprocessForStorage with pre-allocated blob
-//     preprocessor.preprocessForStorage(input, storage_blob, input_blob_size);
-
-//     // Verify size was updated correctly
-//     EXPECT_EQ(input_blob_size, expected_storage_size);
-
-//     // Verify quantization was performed
-//     const uint8_t* quantized = static_cast<const uint8_t*>(storage_blob);
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-
-//     EXPECT_FLOAT_EQ(metadata[0], 2.0f);  // min_val
-//     EXPECT_FLOAT_EQ(metadata[1], 4.0f / 255.0f);  // delta = (6.0f - 2.0f) / 255.0f
-
-//     // Verify quantized values
-//     EXPECT_EQ(quantized[0], 0);    // (2.0f - 2.0f) / delta = 0
-//     EXPECT_EQ(quantized[1], 127);  // (4.0f - 2.0f) / delta ≈ 127
-//     EXPECT_EQ(quantized[2], 255);  // (6.0f - 2.0f) / delta = 255
-
-//     // Clean up
-//     allocator->free_allocation(storage_blob);
-// }
-
-// TEST(QuantPreprocessorTest, PreprocessQueryWithPreAllocatedBlobTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 3;
-
-//     QuantPreprocessor preprocessor(allocator, dim);
-
-//     const float input[dim] = {1.5f, 2.5f, 3.5f};
-//     size_t query_blob_size = dim * sizeof(float);
-//     unsigned char alignment = 16;
-
-//     // Pre-allocate query blob
-//     void* query_blob = allocator->allocate_aligned(query_blob_size, alignment);
-
-//     // Test preprocessQuery with pre-allocated blob
-//     preprocessor.preprocessQuery(input, query_blob, query_blob_size, alignment);
-
-//     // Verify size remains unchanged
-//     EXPECT_EQ(query_blob_size, dim * sizeof(float));
+// Test with all identical values (delta = 0 case)
+TEST(PreprocessorsTest, QuantizationIdenticalValuesTest) {
+    // Test with all same values
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t n_preprocessors = 1;
+    constexpr size_t alignment = 5;
+    constexpr size_t elements = 5;
+    constexpr size_t original_blob_size = elements * sizeof(float);
+    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
+    float *original_blob = static_cast<float *>(original_blob_alloc.get());
+    for (size_t i = 0; i < elements; i++) {
+        original_blob[i] = 1.0f; // All values are the same
+    }
+    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(quant_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_blob, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(storage_blob, query_blob);
 
-//     // Verify data was copied correctly (no quantization)
-//     const float* query_data = static_cast<const float*>(query_blob);
-//     for (size_t i = 0; i < dim; i++) {
-//         EXPECT_FLOAT_EQ(query_data[i], input[i]);
-//     }
+        constexpr size_t quantized_blob_bytes_count =
+            elements * sizeof(uint8_t) + 2 * sizeof(float);
+        uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
+        quant_preprocessor->quantize(original_blob, expected_processed_blob);
+        EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
+                                                        expected_processed_blob,
+                                                        quantized_blob_bytes_count));
+    }
+}
 
-//     // Clean up
-//     allocator->free_allocation(query_blob);
-// }
+TEST(PreprocessorsTest, QuantizationInPlaceTest) {
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    constexpr size_t alignment = 5;
+    constexpr size_t dim = 5;
+    constexpr size_t n_preprocessors = 2;
+    constexpr size_t original_blob_size = dim * sizeof(float);
+    constexpr size_t storage_bytes_count = dim * sizeof(uint8_t) + 2 * sizeof(float);
 
-// TEST(QuantPreprocessorTest, ZeroDimensionEdgeCaseTest) {
-//     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-//     constexpr size_t dim = 1;  // Minimum valid dimension
+    // Create a float array with known values
+    float original_data[dim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
 
-//     QuantPreprocessor preprocessor(allocator, dim);
+    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, dim);
+    auto dummy_preprocessor =
+        new (allocator) dummyPreprocessors::DummyStoragePreprocessor<float>(allocator, 0.0f);
+    auto multiPPContainer =
+        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
+    multiPPContainer.addPreprocessor(dummy_preprocessor);
+    multiPPContainer.addPreprocessor(quant_preprocessor);
+    {
+        ProcessedBlobs processed_blobs =
+            multiPPContainer.preprocess(original_data, original_blob_size);
+        const void *storage_blob = processed_blobs.getStorageBlob();
+        const void *query_blob = processed_blobs.getQueryBlob();
+        // blobs should point to the same memory slot
+        ASSERT_NE(storage_blob, nullptr);
+        ASSERT_NE(storage_blob, query_blob);
 
-//     const float input[dim] = {42.0f};
+        // Verify the quantization results
+        const uint8_t *quantized = static_cast<const uint8_t *>(storage_blob);
+        const float *metadata = reinterpret_cast<const float *>(quantized + dim);
 
-//     const size_t expected_blob_size = dim * sizeof(uint8_t) + 2 * sizeof(float);
-//     auto quantized_blob = allocator->allocate_unique(expected_blob_size);
-//     uint8_t* quantized = static_cast<uint8_t*>(quantized_blob.get());
+        // Extract metadata
+        float min_val = metadata[0];
+        float delta = metadata[1];
 
-//     preprocessor.quantize(input, quantized);
+        // Check min_val is correct (should be 1.0f)
+        ASSERT_FLOAT_EQ(min_val, 1.0f);
 
-//     // Extract metadata
-//     const float* metadata = reinterpret_cast<const float*>(quantized + dim);
-//     const float min_val = metadata[0];
-//     const float delta = metadata[1];
+        // Check delta is correct ((5.0f - 1.0f) / 255.0f)
+        ASSERT_FLOAT_EQ(delta, 4.0f / 255.0f);
 
-//     // With single value, min == max, so delta should be 1.0f
-//     EXPECT_FLOAT_EQ(min_val, 42.0f);
-//     EXPECT_FLOAT_EQ(delta, 1.0f);
-//     EXPECT_EQ(quantized[0], 0);  // Single value quantizes to 0
-// }
+        // Check quantized values
+        for (size_t i = 0; i < dim; i++) {
+            uint8_t expected =
+                static_cast<uint8_t>(std::round((original_data[i] - min_val) / delta));
+            ASSERT_EQ(quantized[i], expected);
+        }
+    }
+}
\ No newline at end of file

From 59fb16df35dbb588f8f0c12e9b851ff40546b572 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Wed, 4 Jun 2025 10:16:16 +0300
Subject: [PATCH 17/22] added tests for coverege

---
 .../spaces/computer/preprocessor_container.h  |   4 +-
 src/VecSim/spaces/computer/preprocessors.h    |  14 +-
 tests/unit/test_components.cpp                | 123 ++++++++++++------
 3 files changed, 91 insertions(+), 50 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
index 915c3b83a..e9ed08f8a 100644
--- a/src/VecSim/spaces/computer/preprocessor_container.h
+++ b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -172,8 +172,8 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o
     void *storage_blob = nullptr;
     void *query_blob = nullptr;
 
-    // Sepreated variables for the storage blob size and query_blob_size,
-    // in case we need to change their sizes to different values.
+    // Use of separate variables for the storage_blob_size and query_blob_size, in case we need to
+    // change their sizes to different values.
     size_t storage_blob_size = input_blob_size;
     size_t query_blob_size = input_blob_size;
 
diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 26811f946..cf0f265f3 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -168,10 +168,10 @@ class QuantPreprocessor : public PreprocessorInterface {
     QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim),
           storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {
-    } // quantized + min + delta{}
+    } // quantized + min + delta
 
     // Helper function to perform quantization. This function is used by both preprocess and
-    // supports in-place quantization of the storage blob.
+    // preprocessQuery and supports in-place quantization of the storage blob.
     void quantize(const float *input, uint8_t *quantized) const {
         assert(input && quantized);
         // Find min and max values
@@ -274,10 +274,6 @@ class QuantPreprocessor : public PreprocessorInterface {
     void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
                          unsigned char alignment) const override {
         // No-op: queries remain as float32
-        if (blob == nullptr) {
-            blob = this->allocator->allocate_aligned(query_blob_size, alignment);
-            memcpy(blob, original_blob, query_blob_size);
-        }
     }
 
     void preprocessQueryInPlace(void *blob, size_t input_blob_size,
@@ -288,15 +284,13 @@ class QuantPreprocessor : public PreprocessorInterface {
 
     void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
         assert(original_blob);
-        assert(input_blob_size >= storage_bytes_count);
+        assert(input_blob_size >= storage_bytes_count &&
+               "Input buffer too small for in-place quantization");
 
         // Only quantize in-place if input buffer is large enough
         if (input_blob_size >= storage_bytes_count) {
             quantize(static_cast<const float *>(original_blob),
                      static_cast<uint8_t *>(original_blob));
-        } else {
-            // Fallback: this shouldn't happen if caller allocated correctly
-            assert(false && "Input buffer too small for in-place quantization");
         }
     }
 
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 96a925d39..6c350104d 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -1044,8 +1044,8 @@ TEST(PreprocessorsTest, QuantizationTest) {
         const float *qexpected_processed_metadata =
             reinterpret_cast<const float *>(expected_processed_blob + elements);
         // Check that the min and delta values are close enough
-        ASSERT_NEAR(storage_blob_metadata[0], qexpected_processed_metadata[0], 1e-6);
-        ASSERT_NEAR(storage_blob_metadata[1], qexpected_processed_metadata[1], 1e-6);
+        ASSERT_FLOAT_EQ(storage_blob_metadata[0], qexpected_processed_metadata[0]);
+        ASSERT_FLOAT_EQ(storage_blob_metadata[1], qexpected_processed_metadata[1]);
 
         float min = storage_blob_metadata[0];
         float delta = storage_blob_metadata[1];
@@ -1202,41 +1202,6 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
     }
 }
 
-// Test with all identical values (delta = 0 case)
-TEST(PreprocessorsTest, QuantizationIdenticalValuesTest) {
-    // Test with all same values
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-    constexpr size_t n_preprocessors = 1;
-    constexpr size_t alignment = 5;
-    constexpr size_t elements = 5;
-    constexpr size_t original_blob_size = elements * sizeof(float);
-    auto original_blob_alloc = allocator->allocate_unique(original_blob_size);
-    float *original_blob = static_cast<float *>(original_blob_alloc.get());
-    for (size_t i = 0; i < elements; i++) {
-        original_blob[i] = 1.0f; // All values are the same
-    }
-    auto quant_preprocessor = new (allocator) QuantPreprocessor(allocator, elements);
-    auto multiPPContainer =
-        MultiPreprocessorsContainer<float, n_preprocessors>(allocator, alignment);
-    multiPPContainer.addPreprocessor(quant_preprocessor);
-    {
-        ProcessedBlobs processed_blobs =
-            multiPPContainer.preprocess(original_blob, original_blob_size);
-        const void *storage_blob = processed_blobs.getStorageBlob();
-        const void *query_blob = processed_blobs.getQueryBlob();
-        // blobs should point to the same memory slot
-        ASSERT_NE(storage_blob, nullptr);
-        ASSERT_NE(storage_blob, query_blob);
-
-        constexpr size_t quantized_blob_bytes_count =
-            elements * sizeof(uint8_t) + 2 * sizeof(float);
-        uint8_t expected_processed_blob[quantized_blob_bytes_count] = {0};
-        quant_preprocessor->quantize(original_blob, expected_processed_blob);
-        EXPECT_NO_FATAL_FAILURE(CompareVectors<uint8_t>(static_cast<const uint8_t *>(storage_blob),
-                                                        expected_processed_blob,
-                                                        quantized_blob_bytes_count));
-    }
-}
 
 TEST(PreprocessorsTest, QuantizationInPlaceTest) {
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
@@ -1286,4 +1251,86 @@ TEST(PreprocessorsTest, QuantizationInPlaceTest) {
             ASSERT_EQ(quantized[i], expected);
         }
     }
-}
\ No newline at end of file
+}
+
+// Test the backward compatibility of the preprocess method with a single input_blob_size parameter
+TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) {
+    using namespace dummyPreprocessors;
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+
+    constexpr size_t dim = 4;
+    unsigned char alignment = 5;
+    float initial_value = 1.0f;
+    const float original_blob[dim] = {initial_value, initial_value, initial_value, initial_value};
+    size_t original_blob_size = sizeof(original_blob);
+
+    // Create a preprocessor that modifies both storage and query blobs
+    auto mixed_preprocessor = new (allocator)
+        DummyMixedPreprocessor<float>(allocator, 7.0f, 2.0f);
+
+    // Test the backward compatibility method (single input_blob_size)
+    void *storage_blob = nullptr;
+    void *query_blob = nullptr;
+    size_t input_blob_size = original_blob_size;
+
+    // Call the backward compatibility version of preprocess
+    mixed_preprocessor->preprocess(original_blob, storage_blob, query_blob, input_blob_size, alignment);
+
+    // Verify that both blobs were allocated and processed correctly
+    ASSERT_NE(storage_blob, nullptr);
+    ASSERT_NE(query_blob, nullptr);
+    ASSERT_NE(storage_blob, query_blob);
+    
+    // Verify that the input_blob_size was updated correctly
+    ASSERT_EQ(input_blob_size, original_blob_size);
+    
+    // Verify that the storage blob was processed correctly
+    ASSERT_EQ(((const float *)storage_blob)[0], initial_value + 7.0f);
+    
+    // Verify that the query blob was processed correctly
+    ASSERT_EQ(((const float *)query_blob)[0], initial_value + 2.0f);
+    
+    // Verify that the query blob is aligned
+    unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
+    ASSERT_EQ(address_alignment, 0);
+}
+
+// Test the QuantPreprocessor's in-place quantization error handling when buffer is too small
+TEST(PreprocessorsTest, QuantPreprocessorInPlaceTooSmallBuffer) {
+    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+    
+    constexpr size_t dim = 4;
+    constexpr size_t required_storage_bytes = dim * sizeof(uint8_t) + 2 * sizeof(float);
+    constexpr size_t too_small_size = required_storage_bytes - 1; // One byte too small
+    
+    // Create a QuantPreprocessor
+    auto quant_preprocessor = QuantPreprocessor(allocator, dim);
+    
+    // Allocate a buffer that's too small for in-place quantization
+    auto buffer_alloc = allocator->allocate_unique(too_small_size);
+    float *buffer = static_cast<float *>(buffer_alloc.get());
+    
+    // Initialize buffer with some values
+    for (size_t i = 0; i < too_small_size / sizeof(float); i++) {
+        buffer[i] = static_cast<float>(i);
+    }
+    
+    // In debug builds, this should trigger an assertion failure
+    EXPECT_DEATH(quant_preprocessor.preprocessStorageInPlace(buffer, too_small_size),
+                 "Input buffer too small for in-place quantization");
+    
+    // Test with correct size buffer to ensure normal operation works
+    constexpr size_t correct_size = required_storage_bytes;
+    auto correct_buffer_alloc = allocator->allocate_unique(correct_size);
+    float *correct_buffer = static_cast<float *>(correct_buffer_alloc.get());
+    
+    // Initialize buffer with some values
+    for (size_t i = 0; i < dim; i++) {
+        correct_buffer[i] = static_cast<float>(i);
+    }
+    
+    // This should not assert
+    EXPECT_NO_FATAL_FAILURE(
+        quant_preprocessor.preprocessStorageInPlace(correct_buffer, correct_size));
+    
+}

From 866d8cbaf3522610661e921e8bf345eecec74a9e Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Wed, 4 Jun 2025 10:27:03 +0300
Subject: [PATCH 18/22] format

---
 tests/unit/test_components.cpp | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 6c350104d..643b02e57 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -1202,7 +1202,6 @@ TEST(PreprocessorsTest, ReallocateVectorCosineQuantizationTest) {
     }
 }
 
-
 TEST(PreprocessorsTest, QuantizationInPlaceTest) {
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
     constexpr size_t alignment = 5;
@@ -1265,8 +1264,7 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) {
     size_t original_blob_size = sizeof(original_blob);
 
     // Create a preprocessor that modifies both storage and query blobs
-    auto mixed_preprocessor = new (allocator)
-        DummyMixedPreprocessor<float>(allocator, 7.0f, 2.0f);
+    auto mixed_preprocessor = new (allocator) DummyMixedPreprocessor<float>(allocator, 7.0f, 2.0f);
 
     // Test the backward compatibility method (single input_blob_size)
     void *storage_blob = nullptr;
@@ -1274,22 +1272,23 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) {
     size_t input_blob_size = original_blob_size;
 
     // Call the backward compatibility version of preprocess
-    mixed_preprocessor->preprocess(original_blob, storage_blob, query_blob, input_blob_size, alignment);
+    mixed_preprocessor->preprocess(original_blob, storage_blob, query_blob, input_blob_size,
+                                   alignment);
 
     // Verify that both blobs were allocated and processed correctly
     ASSERT_NE(storage_blob, nullptr);
     ASSERT_NE(query_blob, nullptr);
     ASSERT_NE(storage_blob, query_blob);
-    
+
     // Verify that the input_blob_size was updated correctly
     ASSERT_EQ(input_blob_size, original_blob_size);
-    
+
     // Verify that the storage blob was processed correctly
     ASSERT_EQ(((const float *)storage_blob)[0], initial_value + 7.0f);
-    
+
     // Verify that the query blob was processed correctly
     ASSERT_EQ(((const float *)query_blob)[0], initial_value + 2.0f);
-    
+
     // Verify that the query blob is aligned
     unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
     ASSERT_EQ(address_alignment, 0);
@@ -1298,39 +1297,38 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) {
 // Test the QuantPreprocessor's in-place quantization error handling when buffer is too small
 TEST(PreprocessorsTest, QuantPreprocessorInPlaceTooSmallBuffer) {
     std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-    
+
     constexpr size_t dim = 4;
     constexpr size_t required_storage_bytes = dim * sizeof(uint8_t) + 2 * sizeof(float);
     constexpr size_t too_small_size = required_storage_bytes - 1; // One byte too small
-    
+
     // Create a QuantPreprocessor
     auto quant_preprocessor = QuantPreprocessor(allocator, dim);
-    
+
     // Allocate a buffer that's too small for in-place quantization
     auto buffer_alloc = allocator->allocate_unique(too_small_size);
     float *buffer = static_cast<float *>(buffer_alloc.get());
-    
+
     // Initialize buffer with some values
     for (size_t i = 0; i < too_small_size / sizeof(float); i++) {
         buffer[i] = static_cast<float>(i);
     }
-    
+
     // In debug builds, this should trigger an assertion failure
     EXPECT_DEATH(quant_preprocessor.preprocessStorageInPlace(buffer, too_small_size),
                  "Input buffer too small for in-place quantization");
-    
+
     // Test with correct size buffer to ensure normal operation works
     constexpr size_t correct_size = required_storage_bytes;
     auto correct_buffer_alloc = allocator->allocate_unique(correct_size);
     float *correct_buffer = static_cast<float *>(correct_buffer_alloc.get());
-    
+
     // Initialize buffer with some values
     for (size_t i = 0; i < dim; i++) {
         correct_buffer[i] = static_cast<float>(i);
     }
-    
+
     // This should not assert
     EXPECT_NO_FATAL_FAILURE(
         quant_preprocessor.preprocessStorageInPlace(correct_buffer, correct_size));
-    
 }

From ec4a3a7d3d42e586ab8bbed0df1d887d78592b13 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Wed, 4 Jun 2025 10:48:10 +0300
Subject: [PATCH 19/22] Remove the tests

---
 tests/unit/test_components.cpp | 39 ----------------------------------
 1 file changed, 39 deletions(-)

diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 643b02e57..5afe2873d 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -1293,42 +1293,3 @@ TEST(PreprocessorsTest, PreprocessBackwardCompatibilityTest) {
     unsigned char address_alignment = (uintptr_t)(query_blob) % alignment;
     ASSERT_EQ(address_alignment, 0);
 }
-
-// Test the QuantPreprocessor's in-place quantization error handling when buffer is too small
-TEST(PreprocessorsTest, QuantPreprocessorInPlaceTooSmallBuffer) {
-    std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-
-    constexpr size_t dim = 4;
-    constexpr size_t required_storage_bytes = dim * sizeof(uint8_t) + 2 * sizeof(float);
-    constexpr size_t too_small_size = required_storage_bytes - 1; // One byte too small
-
-    // Create a QuantPreprocessor
-    auto quant_preprocessor = QuantPreprocessor(allocator, dim);
-
-    // Allocate a buffer that's too small for in-place quantization
-    auto buffer_alloc = allocator->allocate_unique(too_small_size);
-    float *buffer = static_cast<float *>(buffer_alloc.get());
-
-    // Initialize buffer with some values
-    for (size_t i = 0; i < too_small_size / sizeof(float); i++) {
-        buffer[i] = static_cast<float>(i);
-    }
-
-    // In debug builds, this should trigger an assertion failure
-    EXPECT_DEATH(quant_preprocessor.preprocessStorageInPlace(buffer, too_small_size),
-                 "Input buffer too small for in-place quantization");
-
-    // Test with correct size buffer to ensure normal operation works
-    constexpr size_t correct_size = required_storage_bytes;
-    auto correct_buffer_alloc = allocator->allocate_unique(correct_size);
-    float *correct_buffer = static_cast<float *>(correct_buffer_alloc.get());
-
-    // Initialize buffer with some values
-    for (size_t i = 0; i < dim; i++) {
-        correct_buffer[i] = static_cast<float>(i);
-    }
-
-    // This should not assert
-    EXPECT_NO_FATAL_FAILURE(
-        quant_preprocessor.preprocessStorageInPlace(correct_buffer, correct_size));
-}

From 985c2c8e3a52c5c1a89284cb5920cb98343a8b96 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Wed, 4 Jun 2025 16:26:45 +0300
Subject: [PATCH 20/22] Fix test

---
 src/VecSim/spaces/computer/preprocessors.h | 33 ++++++++--------------
 tests/unit/test_components.cpp             |  9 +++---
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index cf0f265f3..6c5366661 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -24,8 +24,7 @@ class PreprocessorInterface : public VecsimBaseObject {
         : VecsimBaseObject(allocator) {}
     // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
     // are the same size.
-    // TODO: Add query_blob_size as a parameter to the preprocess functions, to allow
-    // different sizes for storage and query blobs in the future, if needed.
+    // Use the the overload below for different sizes.
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                             size_t &input_blob_size, unsigned char alignment) const = 0;
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
@@ -149,19 +148,15 @@ class CosinePreprocessor : public PreprocessorInterface {
     const size_t processed_bytes_count;
 };
 
-// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision
-// of uint8_t. The quantization is done by finding the minimum and maximum values of the input
-// vector, and then scaling the values to fit in the range of [0, 255]. The quantized values are
-// then stored in a uint8_t array. [Quantized values, min, delta] Quantized Blob size  =
-// dim_elements * sizeof(int8)  +  2 * sizeof(float) delta = (max_val - min_val) / 255.0f
-// quantized_v[i] = (v[i] - min_val) / delta
-// preprocessForStorage:
-// if null:
-//      - We are not reallocing because it will be released after the query.
-//      Allocate quantized blob size
-// 3. Compute (min, delta) and quantize to the quantized blob or in place.
-// preprocessQuery: No-op – queries arrive as float32 and remain uncompressed
-
+/*
+ * QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision
+    * representation using uint8_t. It stores the quantized values along with metadata (min value and
+    * scaling factor) in a single contiguous blob. The quantized values are then stored in a uint8_t
+    * array. The quantization is done by finding the minimum and maximum values of the input vector,
+    * and then scaling the values to fit in the range of [0, 255]. The quantized values are then
+    * stored in a uint8_t array. The quantized blob size is:
+    * dim_elements * sizeof(int8)  +  2 * sizeof(float)
+*/
 class QuantPreprocessor : public PreprocessorInterface {
 public:
     // Constructor for backward compatibility (single blob size)
@@ -259,7 +254,7 @@ class QuantPreprocessor : public PreprocessorInterface {
     void preprocessForStorage(const void *original_blob, void *&blob,
                               size_t &input_blob_size) const override {
         // Allocate quantized blob if needed
-        if (blob == nullptr) {
+        if (!blob) {
             blob = this->allocator->allocate(storage_bytes_count);
         }
 
@@ -279,7 +274,6 @@ class QuantPreprocessor : public PreprocessorInterface {
     void preprocessQueryInPlace(void *blob, size_t input_blob_size,
                                 unsigned char alignment) const override {
         // No-op: queries remain as float32
-        assert(blob);
     }
 
     void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
@@ -287,11 +281,8 @@ class QuantPreprocessor : public PreprocessorInterface {
         assert(input_blob_size >= storage_bytes_count &&
                "Input buffer too small for in-place quantization");
 
-        // Only quantize in-place if input buffer is large enough
-        if (input_blob_size >= storage_bytes_count) {
-            quantize(static_cast<const float *>(original_blob),
+        quantize(static_cast<const float *>(original_blob),
                      static_cast<uint8_t *>(original_blob));
-        }
     }
 
 private:
diff --git a/tests/unit/test_components.cpp b/tests/unit/test_components.cpp
index 5afe2873d..917167e41 100644
--- a/tests/unit/test_components.cpp
+++ b/tests/unit/test_components.cpp
@@ -1243,11 +1243,10 @@ TEST(PreprocessorsTest, QuantizationInPlaceTest) {
         // Check delta is correct ((5.0f - 1.0f) / 255.0f)
         ASSERT_FLOAT_EQ(delta, 4.0f / 255.0f);
 
-        // Check quantized values
-        for (size_t i = 0; i < dim; i++) {
-            uint8_t expected =
-                static_cast<uint8_t>(std::round((original_data[i] - min_val) / delta));
-            ASSERT_EQ(quantized[i], expected);
+        // dequantize and verify the values
+        for (size_t i = 0; i < dim; ++i) {
+            float dequantized_value = min_val + quantized[i] * delta;
+            ASSERT_NEAR(dequantized_value, original_data[i], 0.01);
         }
     }
 }

From b7aeb2d81c3d37ede7cf1ef9b4dd3aa45c093365 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Thu, 5 Jun 2025 09:54:50 +0300
Subject: [PATCH 21/22] change to input output type

---
 src/VecSim/spaces/computer/preprocessors.h | 52 +++++++++++-----------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 6c5366661..3b58a72fc 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -149,25 +149,27 @@ class CosinePreprocessor : public PreprocessorInterface {
 };
 
 /*
- * QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision
-    * representation using uint8_t. It stores the quantized values along with metadata (min value and
-    * scaling factor) in a single contiguous blob. The quantized values are then stored in a uint8_t
-    * array. The quantization is done by finding the minimum and maximum values of the input vector,
-    * and then scaling the values to fit in the range of [0, 255]. The quantized values are then
-    * stored in a uint8_t array. The quantized blob size is:
-    * dim_elements * sizeof(int8)  +  2 * sizeof(float)
+ * QuantPreprocessor is a preprocessor that quantizes the input vector of INPUT_TYPE (float) to a
+ * lower precision representation using OUTPUT_TYPE (uint8_t). It stores the quantized values along
+ * with metadata (min value and scaling factor) in a single contiguous blob. The quantized values
+ * are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and
+ * maximum values of the input vector, and then scaling the values to fit in the range of [0, 255].
+ * The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE)  +  2 * sizeof(float)
 */
 class QuantPreprocessor : public PreprocessorInterface {
+    using INPUT_TYPE = float;
+    using OUTPUT_TYPE = uint8_t;
+
 public:
     // Constructor for backward compatibility (single blob size)
     QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
         : PreprocessorInterface(allocator), dim(dim),
-          storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {
+          storage_bytes_count(dim * sizeof(OUTPUT_TYPE) + 2 * sizeof(float)) {
     } // quantized + min + delta
 
     // Helper function to perform quantization. This function is used by both preprocess and
     // preprocessQuery and supports in-place quantization of the storage blob.
-    void quantize(const float *input, uint8_t *quantized) const {
+    void quantize(const INPUT_TYPE *input, OUTPUT_TYPE *quantized) const {
         assert(input && quantized);
         // Find min and max values
         auto [min_val, max_val] = find_min_max(input);
@@ -179,7 +181,7 @@ class QuantPreprocessor : public PreprocessorInterface {
 
         // Quantize the values
         for (size_t i = 0; i < this->dim; i++) {
-            quantized[i] = static_cast<uint8_t>(std::round((input[i] - min_val) * inv_delta));
+            quantized[i] = static_cast<OUTPUT_TYPE>(std::round((input[i] - min_val) * inv_delta));
         }
 
         float *metadata = reinterpret_cast<float *>(quantized + this->dim);
@@ -202,12 +204,12 @@ class QuantPreprocessor : public PreprocessorInterface {
         // CASE 1: STORAGE BLOB NEEDS ALLOCATION
         if (!storage_blob) {
             // Allocate aligned memory for the quantized storage blob
-            storage_blob = static_cast<uint8_t *>(
+            storage_blob = static_cast<OUTPUT_TYPE *>(
                 this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
 
             // Quantize directly from original data
-            const float *input = static_cast<const float *>(original_blob);
-            quantize(input, static_cast<uint8_t *>(storage_blob));
+            const INPUT_TYPE *input = static_cast<const INPUT_TYPE *>(original_blob);
+            quantize(input, static_cast<OUTPUT_TYPE *>(storage_blob));
         }
         // CASE 2: STORAGE BLOB EXISTS
         else {
@@ -219,8 +221,8 @@ class QuantPreprocessor : public PreprocessorInterface {
                     this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
 
                 // Quantize from the shared blob (query_blob) to the new storage blob
-                quantize(static_cast<const float *>(query_blob),
-                         static_cast<uint8_t *>(new_storage));
+                quantize(static_cast<const INPUT_TYPE *>(query_blob),
+                         static_cast<OUTPUT_TYPE *>(new_storage));
 
                 // Update storage_blob to point to the new memory
                 storage_blob = new_storage;
@@ -230,20 +232,20 @@ class QuantPreprocessor : public PreprocessorInterface {
                 // Check if storage blob needs resizing
                 if (storage_blob_size < this->storage_bytes_count) {
                     // Allocate new storage with correct size
-                    uint8_t *new_storage = static_cast<uint8_t *>(
+                    OUTPUT_TYPE *new_storage = static_cast<OUTPUT_TYPE *>(
                         this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
 
                     // Quantize from old storage to new storage
-                    quantize(static_cast<const float *>(storage_blob),
-                             static_cast<uint8_t *>(new_storage));
+                    quantize(static_cast<const INPUT_TYPE *>(storage_blob),
+                             static_cast<OUTPUT_TYPE *>(new_storage));
 
                     // Free old storage and update pointer
                     this->allocator->free_allocation(storage_blob);
                     storage_blob = new_storage;
                 } else {
                     // Storage blob is large enough, quantize in-place
-                    quantize(static_cast<const float *>(storage_blob),
-                             static_cast<uint8_t *>(storage_blob));
+                    quantize(static_cast<const INPUT_TYPE *>(storage_blob),
+                             static_cast<OUTPUT_TYPE *>(storage_blob));
                 }
             }
         }
@@ -259,8 +261,8 @@ class QuantPreprocessor : public PreprocessorInterface {
         }
 
         // Cast to appropriate types
-        const float *input = static_cast<const float *>(original_blob);
-        uint8_t *quantized = static_cast<uint8_t *>(blob);
+        const INPUT_TYPE *input = static_cast<const INPUT_TYPE *>(original_blob);
+        OUTPUT_TYPE *quantized = static_cast<OUTPUT_TYPE *>(blob);
         quantize(input, quantized);
 
         input_blob_size = storage_bytes_count;
@@ -281,12 +283,12 @@ class QuantPreprocessor : public PreprocessorInterface {
         assert(input_blob_size >= storage_bytes_count &&
                "Input buffer too small for in-place quantization");
 
-        quantize(static_cast<const float *>(original_blob),
-                     static_cast<uint8_t *>(original_blob));
+        quantize(static_cast<const INPUT_TYPE *>(original_blob),
+                 static_cast<OUTPUT_TYPE *>(original_blob));
     }
 
 private:
-    std::pair<float, float> find_min_max(const float *input) const {
+    std::pair<float, float> find_min_max(const INPUT_TYPE *input) const {
         float min_val = input[0];
         float max_val = input[0];
 

From a8aee990c2aa9c230d5ba9174a87791414686bf1 Mon Sep 17 00:00:00 2001
From: Dor Forer <dor.forer@redis.com>
Date: Thu, 5 Jun 2025 10:14:31 +0300
Subject: [PATCH 22/22] format

---
 src/VecSim/spaces/computer/preprocessors.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
index 3b58a72fc..f981b15a8 100644
--- a/src/VecSim/spaces/computer/preprocessors.h
+++ b/src/VecSim/spaces/computer/preprocessors.h
@@ -155,7 +155,7 @@ class CosinePreprocessor : public PreprocessorInterface {
  * are then stored in an OUTPUT_TYPE array. The quantization is done by finding the minimum and
  * maximum values of the input vector, and then scaling the values to fit in the range of [0, 255].
  * The quantized blob size is: dim_elements * sizeof(OUTPUT_TYPE)  +  2 * sizeof(float)
-*/
+ */
 class QuantPreprocessor : public PreprocessorInterface {
     using INPUT_TYPE = float;
     using OUTPUT_TYPE = uint8_t;