diff --git a/nanovllm/engine/block_manager.py b/nanovllm/engine/block_manager.py index 65d725e4..09b88249 100644 --- a/nanovllm/engine/block_manager.py +++ b/nanovllm/engine/block_manager.py @@ -69,11 +69,11 @@ def allocate(self, seq: Sequence): if cache_miss: block_id = self.free_block_ids[0] block = self._allocate_block(block_id) - else: - seq.num_cached_tokens += self.block_size + else: if block_id in self.used_block_ids: block = self.blocks[block_id] block.ref_count += 1 + seq.num_cached_tokens += self.block_size else: block = self._allocate_block(block_id) if h != -1: