@@ -39,6 +39,24 @@ namespace vineyard {
39
39
// forward declaration
40
40
struct LLMKV ;
41
41
42
+ // A KVCacheChunk contains all the KV tensors of a fixed number of
43
+ // tokens (i.e., `chunk_size`).
44
+ //
45
+ // In its object blob, we first store all the KV tensors, and then
46
+ // store all the tokens (including prefix tokens and current tokens
47
+ // cached in the chunk), which will be used to avoid hash conflicts.
48
+ //
49
+ // In its metadata, we store the namespace (i.e., `ns_`), which will
50
+ // be used as the name prefix of each chunk. Clients can also use the
51
+ // namespace to list all the chunks. Access time (i.e., 'access_time_`)
52
+ // in its metadata is used for the TTL-based global GC. We also have
53
+ // the md5sum of all tokens (including prefix tokens and current tokens)
54
+ // in its metadata. When we reconstruct a chunk from the object blob
55
+ // and metadata, we calculate the md5sum of all tokens in the blob and
56
+ // compare it with the md5sum in the metadata. If they are the same,
57
+ // we consider the chunk is valid. Otherwise, we consider the chunk is
58
+ // corrupted. By far, we don't use the md5sum of the tensors to alleviate
59
+ // the compute overhead.
42
60
class KVCacheChunk : public vineyard ::Registered<KVCacheChunk> {
43
61
public:
44
62
inline static constexpr char kFieldNameNS [] = " namespace" ;
@@ -52,12 +70,17 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
52
70
53
71
private:
54
72
std::shared_ptr<Buffer> buffer_;
73
+ // number of prefix tokens and current tokens in the chunk
55
74
int total_tokens_;
56
75
int tensor_nbytes_;
57
76
int layer_;
58
77
int chunk_size_;
78
+ // access time is used for TTL-based global GC
59
79
uint64_t access_time_;
80
+ // md5sum of all tokens (including prefix tokens and current tokens)
60
81
std::string md5_;
82
+ // namespace. chunks within the same namespace will be shared
83
+ // among different clients
61
84
std::string ns_;
62
85
63
86
public:
@@ -79,23 +102,53 @@ class KVCacheChunk : public vineyard::Registered<KVCacheChunk> {
79
102
friend class KVCacheChunkBuilder ;
80
103
};
81
104
105
+ // A KVCacheChunkBuilder is used to build a KVCacheChunk.
106
+ //
107
+ // We have two kinds of builders:
108
+ // 1. The builder to build a new chunk.
109
+ // 2. The builder to rebuild a chunk from the object blob and metadata.
110
+ //
111
+ // For the first kind of builder, `Make` creates an empty chunk and an
112
+ // `Update` filles the chunk with KV tensors. After `Update`, the chunk
113
+ // is marked as ready and waiting readers will be notified. This kind
114
+ // of builder can be sealed to a KVCacheChunk.
115
+ //
116
+ // For the second kind of builder, `Make` only assignes the chunk id and
117
+ // the first `Query` will trigger a construction of the chunk, i.e.,
118
+ // constructing the corresponding chunk with fetched metadata and blob.
119
+ // After construction, the chunk is marked as ready and other waiting
120
+ // readers will be notified. This kind of builder will never be sealed
121
+ // since the chunk already exists in the object store.
122
+ //
123
+ // We also track the access time of the chunk in the builder. Global
124
+ // access time is the latest access time of the global object we know.
125
+ // Access time is the local access time that is updated by each access.
126
+ // The local access time will finally be updated to the global access
127
+ // time based on the policy used in AIBrixBlobStorage.
82
128
class KVCacheChunkBuilder {
83
129
private:
84
130
RPCClient& rpc_client_;
85
131
std::vector<int > all_tokens_;
86
132
std::shared_ptr<RemoteBlobWriter> remote_buffer_writer_ = nullptr ;
87
133
ObjectID chunk_id_;
88
134
std::shared_ptr<Buffer> buffer_ = nullptr ;
135
+
89
136
int total_tokens_;
90
137
int tensor_nbytes_;
91
138
int layer_;
92
139
int chunk_size_;
93
140
std::string ns_;
141
+
142
+ // `time_mu_` protects the access times of the chunk.
94
143
std::shared_mutex time_mu_;
95
144
uint64_t g_access_time_ = 0 ;
96
145
uint64_t access_time_ = 0 ;
146
+
147
+ // `mutex_` and `cv_` are used to block readers until the chunk
148
+ // is ready to be read.
97
149
std::mutex mutex_;
98
150
std::condition_variable cv_;
151
+
99
152
std::atomic<bool > is_ready_ = false ;
100
153
std::string md5_;
101
154
@@ -140,6 +193,7 @@ class KVCacheChunkBuilder {
140
193
return access_time_;
141
194
}
142
195
196
+ // Whether the chunk is ready to be read.
143
197
bool IsReady () { return is_ready_; }
144
198
145
199
std::shared_ptr<Object> Seal ();
@@ -150,6 +204,7 @@ class KVCacheChunkBuilder {
150
204
151
205
void PrintKVCacheChunk ();
152
206
207
+ // Whether the chunk is the same as the chunk with the given metadata.
153
208
Status IsSame (const ObjectMeta& meta);
154
209
155
210
KVCacheChunkBuilder (RPCClient& rpc_client, int tensor_nbytes, int layer,
0 commit comments