1
- from typing import Dict , List , Optional
1
+ from typing import Dict , FrozenSet , List , Optional
2
2
3
- from vllm .core .block .interfaces import (Block , BlockAllocator ,
3
+ from vllm .core .block .interfaces import (Block , BlockAllocator , BlockId ,
4
4
DeviceAwareBlockAllocator )
5
5
from vllm .core .block .naive_block import NaiveBlock , NaiveBlockAllocator
6
6
from vllm .core .block .prefix_caching_block import PrefixCachingBlockAllocator
@@ -57,15 +57,15 @@ def create(
57
57
cpu_block_ids = block_ids [num_gpu_blocks :]
58
58
59
59
if allocator_type == "naive" :
60
- gpu_allocator = NaiveBlockAllocator (
61
- create_block = NaiveBlock ,
60
+ gpu_allocator : BlockAllocator = NaiveBlockAllocator (
61
+ create_block = NaiveBlock , # type: ignore
62
62
num_blocks = num_gpu_blocks ,
63
63
block_size = block_size ,
64
64
block_ids = gpu_block_ids ,
65
65
)
66
66
67
- cpu_allocator = NaiveBlockAllocator (
68
- create_block = NaiveBlock ,
67
+ cpu_allocator : BlockAllocator = NaiveBlockAllocator (
68
+ create_block = NaiveBlock , # type: ignore
69
69
num_blocks = num_cpu_blocks ,
70
70
block_size = block_size ,
71
71
block_ids = cpu_block_ids ,
@@ -105,13 +105,14 @@ def __init__(
105
105
Device .GPU : gpu_block_allocator ,
106
106
}
107
107
108
- self ._block_ids_to_allocator = {}
108
+ self ._block_ids_to_allocator : Dict [ int , BlockAllocator ] = {}
109
109
for _ , allocator in self ._allocators .items ():
110
110
for block_id in allocator .all_block_ids :
111
111
self ._block_ids_to_allocator [block_id ] = allocator
112
112
113
- def allocate_mutable (self , prev_block : Optional [Block ],
114
- device : Device ) -> Block :
113
+ def allocate_mutable (self ,
114
+ prev_block : Optional [Block ],
115
+ device : Optional [Device ] = None ) -> Block :
115
116
"""Allocates a new mutable block on the specified device.
116
117
117
118
Args:
@@ -122,10 +123,13 @@ def allocate_mutable(self, prev_block: Optional[Block],
122
123
Returns:
123
124
Block: The newly allocated mutable block.
124
125
"""
126
+ assert device is not None
125
127
return self ._allocators [device ].allocate_mutable (prev_block )
126
128
127
- def allocate_immutable (self , prev_block : Optional [Block ],
128
- token_ids : List [int ], device : Device ) -> Block :
129
+ def allocate_immutable (self ,
130
+ prev_block : Optional [Block ],
131
+ token_ids : List [int ],
132
+ device : Optional [Device ] = None ) -> Block :
129
133
"""Allocates a new immutable block with the provided token IDs on the
130
134
specified device.
131
135
@@ -140,6 +144,7 @@ def allocate_immutable(self, prev_block: Optional[Block],
140
144
Block: The newly allocated immutable block containing the provided
141
145
token IDs.
142
146
"""
147
+ assert device is not None
143
148
return self ._allocators [device ].allocate_immutable (
144
149
prev_block , token_ids )
145
150
@@ -149,7 +154,9 @@ def free(self, block: Block) -> None:
149
154
Args:
150
155
block (Block): The block to be freed.
151
156
"""
152
- allocator = self ._block_ids_to_allocator [block .block_id ]
157
+ block_id = block .block_id
158
+ assert block_id is not None
159
+ allocator = self ._block_ids_to_allocator [block_id ]
153
160
return allocator .free (block )
154
161
155
162
def fork (self , last_block : Block ) -> List [Block ]:
@@ -163,19 +170,22 @@ def fork(self, last_block: Block) -> List[Block]:
163
170
List[Block]: A new list of blocks that shares the same memory as the
164
171
original sequence.
165
172
"""
166
- allocator = self ._block_ids_to_allocator [last_block .block_id ]
173
+ block_id = last_block .block_id
174
+ assert block_id is not None
175
+ allocator = self ._block_ids_to_allocator [block_id ]
167
176
return allocator .fork (last_block )
168
177
169
- def get_num_free_blocks (self , device : Device ) -> int :
178
+ def get_num_free_blocks (self , device : Optional [ Device ] = None ) -> int :
170
179
"""Returns the number of free blocks available on the specified device.
171
180
172
181
Args:
173
182
device (Device): The device for which to query the number of free
174
- blocks.
183
+ blocks. AssertionError is raised if None is passed.
175
184
176
185
Returns:
177
186
int: The number of free blocks available on the specified device.
178
187
"""
188
+ assert device is not None
179
189
return self ._allocators [device ].get_num_free_blocks ()
180
190
181
191
def clear_copy_on_writes (self ) -> Dict [int , List [int ]]:
@@ -210,5 +220,12 @@ def get_common_computed_block_ids(
210
220
return self ._allocators [device ].get_common_computed_block_ids (
211
221
seq_block_ids )
212
222
213
- def all_block_ids (self ) -> frozenset [int ]:
223
+ @property
224
+ def all_block_ids (self ) -> FrozenSet [int ]:
214
225
return frozenset (self ._block_ids_to_allocator .keys ())
226
+
227
+ def promote_to_immutable_block (self , block : Block ) -> BlockId :
228
+ raise NotImplementedError
229
+
230
+ def cow_block_if_not_appendable (self , block : Block ) -> Optional [BlockId ]:
231
+ raise NotImplementedError
0 commit comments