Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions nanovllm/engine/block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ def may_append(self, seq: Sequence):
self._allocate_block(block_id)
block_table.append(block_id)
elif len(seq) % self.block_size == 0:
assert last_block.hash == -1
token_ids = seq.block(seq.num_blocks-1)
prefix = self.blocks[block_table[-2]].hash if len(block_table) > 1 else -1
h = self.compute_hash(token_ids, prefix)
last_block.update(h, token_ids)
self.hash_to_block_id[h] = last_block.block_id
if last_block.hash == -1:
token_ids = seq.block(seq.num_blocks-1)
prefix = self.blocks[block_table[-2]].hash if len(block_table) > 1 else -1
h = self.compute_hash(token_ids, prefix)
last_block.update(h, token_ids)
self.hash_to_block_id[h] = last_block.block_id
else:
assert last_block.hash == -1
8 changes: 5 additions & 3 deletions nanovllm/engine/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ def schedule(self) -> tuple[list[Sequence], bool]:
seq = self.waiting[0]
if num_batched_tokens + len(seq) > self.max_num_batched_tokens or not self.block_manager.can_allocate(seq):
break
num_seqs += 1
self.block_manager.allocate(seq)
num_batched_tokens += len(seq) - seq.num_cached_tokens
seq.status = SequenceStatus.RUNNING
self.waiting.popleft()
self.running.append(seq)
scheduled_seqs.append(seq)
tokens_to_compute=len(seq) - seq.num_cached_tokens
if tokens_to_compute > 0:
num_seqs += 1
scheduled_seqs.append(seq)
num_batched_tokens += tokens_to_compute
if scheduled_seqs:
return scheduled_seqs, True

Expand Down