Skip to content

Commit b21d144

Browse files
committed
Enhance comments for major components
Signed-off-by: DwyaneShi <[email protected]>
1 parent 842bedd commit b21d144

File tree

2 files changed

+101
-2
lines changed

2 files changed

+101
-2
lines changed

modules/llm-cache/ds/kv_cache_chunk.h

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,24 @@ namespace vineyard {
3939
// forward declaration
4040
struct LLMKV;
4141

42+
// A KVCacheChunk contains all the KV tensors of a fixed number of
43+
// tokens (i.e., `chunk_size`).
44+
//
45+
// In its object blob, we first store all the KV tensors, and then
46+
// store all the tokens (including prefix tokens and current tokens
47+
// cached in the chunk), which will be used to avoid hash conflicts.
48+
//
49+
// In its metadata, we store the namespace (i.e., `ns_`), which will
50+
// be used as the name prefix of each chunk. Clients can also use the
51+
// namespace to list all the chunks. Access time (i.e., 'access_time_`)
52+
// in its metadata is used for the TTL-based global GC. We also have
53+
// the md5sum of all tokens (including prefix tokens and current tokens)
54+
// in its metadata. When we reconstruct a chunk from the object blob
55+
// and metadata, we calculate the md5sum of all tokens in the blob and
56+
// compare it with the md5sum in the metadata. If they are the same,
57+
// we consider the chunk is valid. Otherwise, we consider the chunk is
58+
// corrupted. By far, we don't use the md5sum of the tensors to alleviate
59+
// the compute overhead.
4260
class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
4361
public:
4462
inline static constexpr char kFieldNameNS[] = "namespace";
@@ -52,12 +70,17 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
5270

5371
private:
5472
std::shared_ptr<Buffer> buffer_;
73+
// number of prefix tokens and current tokens in the chunk
5574
int total_tokens_;
5675
int tensor_nbytes_;
5776
int layer_;
5877
int chunk_size_;
78+
// access time is used for TTL-based global GC
5979
uint64_t access_time_;
80+
// md5sum of all tokens (including prefix tokens and current tokens)
6081
std::string md5_;
82+
// namespace. chunks within the same namespace will be shared
83+
// among different clients
6184
std::string ns_;
6285

6386
public:
@@ -79,23 +102,53 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
79102
friend class KVCacheChunkBuilder;
80103
};
81104

105+
// A KVCacheChunkBuilder is used to build a KVCacheChunk.
106+
//
107+
// We have two kinds of builders:
108+
// 1. The builder to build a new chunk.
109+
// 2. The builder to rebuild a chunk from the object blob and metadata.
110+
//
111+
// For the first kind of builder, `Make` creates an empty chunk and an
112+
// `Update` filles the chunk with KV tensors. After `Update`, the chunk
113+
// is marked as ready and waiting readers will be notified. This kind
114+
// of builder can be sealed to a KVCacheChunk.
115+
//
116+
// For the second kind of builder, `Make` only assignes the chunk id and
117+
// the first `Query` will trigger a construction of the chunk, i.e.,
118+
// constructing the corresponding chunk with fetched metadata and blob.
119+
// After construction, the chunk is marked as ready and other waiting
120+
// readers will be notified. This kind of builder will never be sealed
121+
// since the chunk already exists in the object store.
122+
//
123+
// We also track the access time of the chunk in the builder. Global
124+
// access time is the latest access time of the global object we know.
125+
// Access time is the local access time that is updated by each access.
126+
// The local access time will finally be updated to the global access
127+
// time based on the policy used in AIBrixBlobStorage.
82128
class KVCacheChunkBuilder {
83129
private:
84130
RPCClient& rpc_client_;
85131
std::vector<int> all_tokens_;
86132
std::shared_ptr<RemoteBlobWriter> remote_buffer_writer_ = nullptr;
87133
ObjectID chunk_id_;
88134
std::shared_ptr<Buffer> buffer_ = nullptr;
135+
89136
int total_tokens_;
90137
int tensor_nbytes_;
91138
int layer_;
92139
int chunk_size_;
93140
std::string ns_;
141+
142+
// `time_mu_` protects the access times of the chunk.
94143
std::shared_mutex time_mu_;
95144
uint64_t g_access_time_ = 0;
96145
uint64_t access_time_ = 0;
146+
147+
// `mutex_` and `cv_` are used to block readers until the chunk
148+
// is ready to be read.
97149
std::mutex mutex_;
98150
std::condition_variable cv_;
151+
99152
std::atomic<bool> is_ready_ = false;
100153
std::string md5_;
101154

@@ -140,6 +193,7 @@ class KVCacheChunkBuilder {
140193
return access_time_;
141194
}
142195

196+
// Whether the chunk is ready to be read.
143197
bool IsReady() { return is_ready_; }
144198

145199
std::shared_ptr<Object> Seal();
@@ -150,6 +204,7 @@ class KVCacheChunkBuilder {
150204

151205
void PrintKVCacheChunk();
152206

207+
// Whether the chunk is the same as the chunk with the given metadata.
153208
Status IsSame(const ObjectMeta& meta);
154209

155210
KVCacheChunkBuilder(RPCClient& rpc_client, int tensor_nbytes, int layer,

modules/llm-cache/storage/aibrix_blob_storage.h

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,59 @@ limitations under the License.
3434

3535
namespace vineyard {
3636

37+
// AIBrixBlobStorage is the storage backend of KVCacheChunk.
38+
// It employs the S3-FIFO replacement policy to retain scan-
39+
// resistant and recognize hot chunks. Please refer to member
40+
// variable comments for more details of the S3-FIFO policy.
41+
//
42+
// In our implementation, the Main FIFO list of S3-FIFO is a
43+
// mirror of the global chunk list. New chunks in the Main
44+
// FIFO list will be periodically persisted to the global
45+
// chunk list by the LocalSync function. Persisted chunks
46+
// evicted from the Main FIFO list will be deleted from the
47+
// global chunk list.
48+
//
49+
// Each chunk has an associated name that is generated by
50+
// equation: name = namespace + "_" + hash(hash(previous chunk)
51+
// + tokens of current chunk)
52+
// Please refer to computeChunkHashesForTokens for more details.
53+
//
54+
// Each name is supposed to be unique. For a given prefix tokens
55+
// and query tokens, after generating the chunk names, we will
56+
// use the names to get the corresponding chunks if exist.
57+
//
58+
// Each global chunk has an associated label called "access_time",
59+
// which indicates the last access time of the chunk. For those
60+
// chunks cached in the local FIFO lists, we will update their
61+
// access time upon each assess but only push the access time
62+
// to the global during LocalSync function.
63+
//
64+
// In GlobalGC, we will list all the global chunks within the
65+
// namespace, and check if any chunks reach the TTL. If so, we
66+
// will delete them from the global.
67+
//
68+
// We use threadpool to perform memory copies in parallel for
69+
// both `Query` and `Update` to speed up the cache. The return
70+
// of `Query` and `Update` indicates the completion of all the
71+
// memory copies and it is safe to reuse the input buffers.
3772
class AIBrixBlobStorage
3873
: public IStorage,
3974
public std::enable_shared_from_this<AIBrixBlobStorage> {
4075
private:
76+
// Max number of tokens supported by the cache. If the total
77+
// number of prefix tokens and current tokens of an update
78+
// exceeds the max tokens, we will drop the update.
4179
static constexpr int kMaxTokensPerSeq = 64 * 1024;
4280
static constexpr double kSmallFifoCapacityRatio = 0.3;
81+
// The preferred number of evicted items for each eviction of
82+
// the Main FIFO list to amortize the cost of deleting from
83+
// the object store.
4384
static constexpr int kMinEviction = 32;
4485

4586
RPCClient& rpc_client_;
4687
Client& ipc_client_;
4788

89+
// hash algorithm and hasher used to generate chunk hashes
4890
std::shared_ptr<IHashAlgorithm> hash_alg_;
4991
std::shared_ptr<Hasher> hasher_;
5092

@@ -53,6 +95,8 @@ class AIBrixBlobStorage
5395
int chunk_size_;
5496
int capacity_;
5597
size_t chunk_obj_size_;
98+
// namespace. chunks within the same namespace will be shared
99+
// among different clients
56100
std::string kv_cache_ns_;
57101

58102
// intervals in seconds
@@ -61,6 +105,7 @@ class AIBrixBlobStorage
61105
// TTL in seconds
62106
std::chrono::duration<int64_t> global_ttl_s_;
63107

108+
// indicates whether the cache is closed
64109
bool exit_flag_ = false;
65110

66111
// global GC is carried out in the global GC thread.
@@ -120,8 +165,6 @@ class AIBrixBlobStorage
120165
EvictingCacheMap<std::string, FifoEntry>
121166
main_fifo_; // mirror of global chunk list
122167

123-
std::vector<ObjectID> evict_list_;
124-
125168
public:
126169
AIBrixBlobStorage(RPCClient& rpc_client, Client& ipc_client,
127170
size_t tensor_nbytes, int capacity, int layer,
@@ -196,6 +239,7 @@ class AIBrixBlobStorage
196239
std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kv_tensors,
197240
size_t& matched);
198241

242+
// Seal and persist the chunk, and then put the given name for the chunk.
199243
Status SealAndPersist(
200244
const std::string& name,
201245
const std::shared_ptr<KVCacheChunkBuilder>& chunk_builder,

0 commit comments

Comments
 (0)