Enhance comments for major components

DwyaneShi · DwyaneShi · commit b21d14491515 · 2024-11-20T14:26:42.000-08:00
Signed-off-by: DwyaneShi &lt;dwyane.shi@gmail.com&gt;
diff --git a/modules/llm-cache/ds/kv_cache_chunk.h b/modules/llm-cache/ds/kv_cache_chunk.h
@@ -39,6 +39,24 @@ namespace vineyard {
 // forward declaration
 struct LLMKV;
 
+// A KVCacheChunk contains all the KV tensors of a fixed number of
+// tokens (i.e., `chunk_size`).
+//
+// In its object blob, we first store all the KV tensors, and then
+// store all the tokens (including prefix tokens and current tokens
+// cached in the chunk), which will be used to avoid hash conflicts.
+//
+// In its metadata, we store the namespace (i.e., `ns_`), which will
+// be used as the name prefix of each chunk. Clients can also use the
+// namespace to list all the chunks. Access time (i.e., 'access_time_`)
+// in its metadata is used for the TTL-based global GC. We also have
+// the md5sum of all tokens (including prefix tokens and current tokens)
+// in its metadata. When we reconstruct a chunk from the object blob
+// and metadata, we calculate the md5sum of all tokens in the blob and
+// compare it with the md5sum in the metadata. If they are the same,
+// we consider the chunk is valid. Otherwise, we consider the chunk is
+// corrupted. By far, we don't use the md5sum of the tensors to alleviate
+// the compute overhead.
 class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
  public:
   inline static constexpr char kFieldNameNS[] = "namespace";
@@ -52,12 +70,17 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
 
  private:
   std::shared_ptr<Buffer> buffer_;
+  // number of prefix tokens and current tokens in the chunk
   int total_tokens_;
   int tensor_nbytes_;
   int layer_;
   int chunk_size_;
+  // access time is used for TTL-based global GC
   uint64_t access_time_;
+  // md5sum of all tokens (including prefix tokens and current tokens)
   std::string md5_;
+  // namespace. chunks within the same namespace will be shared
+  // among different clients
   std::string ns_;
 
  public:
@@ -79,23 +102,53 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
   friend class KVCacheChunkBuilder;
 };
 
+// A KVCacheChunkBuilder is used to build a KVCacheChunk.
+//
+// We have two kinds of builders:
+// 1. The builder to build a new chunk.
+// 2. The builder to rebuild a chunk from the object blob and metadata.
+//
+// For the first kind of builder, `Make` creates an empty chunk and an
+// `Update` filles the chunk with KV tensors. After `Update`, the chunk
+// is marked as ready and waiting readers will be notified. This kind
+// of builder can be sealed to a KVCacheChunk.
+//
+// For the second kind of builder, `Make` only assignes the chunk id and
+// the first `Query` will trigger a construction of the chunk, i.e.,
+// constructing the corresponding chunk with fetched metadata and blob.
+// After construction, the chunk is marked as ready and other waiting
+// readers will be notified. This kind of builder will never be sealed
+// since the chunk already exists in the object store.
+//
+// We also track the access time of the chunk in the builder. Global
+// access time is the latest access time of the global object we know.
+// Access time is the local access time that is updated by each access.
+// The local access time will finally be updated to the global access
+// time based on the policy used in AIBrixBlobStorage.
 class KVCacheChunkBuilder {
  private:
   RPCClient& rpc_client_;
   std::vector<int> all_tokens_;
   std::shared_ptr<RemoteBlobWriter> remote_buffer_writer_ = nullptr;
   ObjectID chunk_id_;
   std::shared_ptr<Buffer> buffer_ = nullptr;
+
   int total_tokens_;
   int tensor_nbytes_;
   int layer_;
   int chunk_size_;
   std::string ns_;
+
+  // `time_mu_` protects the access times of the chunk.
   std::shared_mutex time_mu_;
   uint64_t g_access_time_ = 0;
   uint64_t access_time_ = 0;
+
+  // `mutex_` and `cv_` are used to block readers until the chunk
+  // is ready to be read.
   std::mutex mutex_;
   std::condition_variable cv_;
+
   std::atomic<bool> is_ready_ = false;
   std::string md5_;
 
@@ -140,6 +193,7 @@ class KVCacheChunkBuilder {
     return access_time_;
   }
 
+  // Whether the chunk is ready to be read.
   bool IsReady() { return is_ready_; }
 
   std::shared_ptr<Object> Seal();
@@ -150,6 +204,7 @@ class KVCacheChunkBuilder {
 
   void PrintKVCacheChunk();
 
+  // Whether the chunk is the same as the chunk with the given metadata.
   Status IsSame(const ObjectMeta& meta);
 
   KVCacheChunkBuilder(RPCClient& rpc_client, int tensor_nbytes, int layer,
diff --git a/modules/llm-cache/storage/aibrix_blob_storage.h b/modules/llm-cache/storage/aibrix_blob_storage.h
@@ -34,17 +34,59 @@ limitations under the License.
 
 namespace vineyard {
 
+// AIBrixBlobStorage is the storage backend of KVCacheChunk.
+// It employs the S3-FIFO replacement policy to retain scan-
+// resistant and recognize hot chunks. Please refer to member
+// variable comments for more details of the S3-FIFO policy.
+//
+// In our implementation, the Main FIFO list of S3-FIFO is a
+// mirror of the global chunk list. New chunks in the Main
+// FIFO list will be periodically persisted to the global
+// chunk list by the LocalSync function. Persisted chunks
+// evicted from the Main FIFO list will be deleted from the
+// global chunk list.
+//
+// Each chunk has an associated name that is generated by
+// equation: name = namespace + "_" + hash(hash(previous chunk)
+//                  + tokens of current chunk)
+// Please refer to computeChunkHashesForTokens for more details.
+//
+// Each name is supposed to be unique. For a given prefix tokens
+// and query tokens, after generating the chunk names, we will
+// use the names to get the corresponding chunks if exist.
+//
+// Each global chunk has an associated label called "access_time",
+// which indicates the last access time of the chunk. For those
+// chunks cached in the local FIFO lists, we will update their
+// access time upon each assess but only push the access time
+// to the global during LocalSync function.
+//
+// In GlobalGC, we will list all the global chunks within the
+// namespace, and check if any chunks reach the TTL. If so, we
+// will delete them from the global.
+//
+// We use threadpool to perform memory copies in parallel for
+// both `Query` and `Update` to speed up the cache. The return
+// of `Query` and `Update` indicates the completion of all the
+// memory copies and it is safe to reuse the input buffers.
 class AIBrixBlobStorage
     : public IStorage,
       public std::enable_shared_from_this<AIBrixBlobStorage> {
  private:
+  // Max number of tokens supported by the cache. If the total
+  // number of prefix tokens and current tokens of an update
+  // exceeds the max tokens, we will drop the update.
   static constexpr int kMaxTokensPerSeq = 64 * 1024;
   static constexpr double kSmallFifoCapacityRatio = 0.3;
+  // The preferred number of evicted items for each eviction of
+  // the Main FIFO list to amortize the cost of deleting from
+  // the object store.
   static constexpr int kMinEviction = 32;
 
   RPCClient& rpc_client_;
   Client& ipc_client_;
 
+  // hash algorithm and hasher used to generate chunk hashes
   std::shared_ptr<IHashAlgorithm> hash_alg_;
   std::shared_ptr<Hasher> hasher_;
 
@@ -53,6 +95,8 @@ class AIBrixBlobStorage
   int chunk_size_;
   int capacity_;
   size_t chunk_obj_size_;
+  // namespace. chunks within the same namespace will be shared
+  // among different clients
   std::string kv_cache_ns_;
 
   // intervals in seconds
@@ -61,6 +105,7 @@ class AIBrixBlobStorage
   // TTL in seconds
   std::chrono::duration<int64_t> global_ttl_s_;
 
+  // indicates whether the cache is closed
   bool exit_flag_ = false;
 
   // global GC is carried out in the global GC thread.
@@ -120,8 +165,6 @@ class AIBrixBlobStorage
   EvictingCacheMap<std::string, FifoEntry>
       main_fifo_;  // mirror of global chunk list
 
-  std::vector<ObjectID> evict_list_;
-
  public:
   AIBrixBlobStorage(RPCClient& rpc_client, Client& ipc_client,
                     size_t tensor_nbytes, int capacity, int layer,
@@ -196,6 +239,7 @@ class AIBrixBlobStorage
       std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kv_tensors,
       size_t& matched);
 
+  // Seal and persist the chunk, and then put the given name for the chunk.
   Status SealAndPersist(
       const std::string& name,
       const std::shared_ptr<KVCacheChunkBuilder>& chunk_builder,