|
42 | 42 | TGISStatLogger,
|
43 | 43 | )
|
44 | 44 |
|
| 45 | +from .adapters import AdapterStore, validate_adapters |
45 | 46 | from .pb import generation_pb2_grpc
|
46 | 47 | from .pb.generation_pb2 import DESCRIPTOR as _GENERATION_DESCRIPTOR
|
47 | 48 | from .pb.generation_pb2 import (
|
|
56 | 57 | )
|
57 | 58 | from .validation import validate_input, validate_params
|
58 | 59 |
|
59 |
| -try: |
60 |
| - from .adapters import AdapterStore, validate_adapters |
61 |
| -except ImportError: |
62 |
| - adapters_available = False |
63 |
| -else: |
64 |
| - adapters_available = True |
65 |
| - |
66 |
| - |
67 | 60 | if TYPE_CHECKING:
|
68 | 61 | import argparse
|
69 | 62 | from collections.abc import AsyncIterator, MutableSequence
|
|
76 | 69 | from vllm.lora.request import LoRARequest
|
77 | 70 | from vllm.sequence import Logprob
|
78 | 71 |
|
| 72 | + from .adapters import PromptAdapterRequest |
79 | 73 | from .pb.generation_pb2 import (
|
80 | 74 | BatchedGenerationRequest,
|
81 | 75 | BatchedTokenizeRequest,
|
@@ -224,11 +218,7 @@ async def Generate(
|
224 | 218 | start_time = time.time()
|
225 | 219 | service_metrics.count_generate_request(len(request.requests))
|
226 | 220 | request_id = self.request_id(context)
|
227 |
| - adapter_kwargs = ( |
228 |
| - await self._validate_adapters(request, context) |
229 |
| - if adapters_available |
230 |
| - else {} |
231 |
| - ) |
| 221 | + adapter_kwargs = await self._validate_adapters(request, context) |
232 | 222 | tokenizer = await self._get_tokenizer(adapter_kwargs)
|
233 | 223 |
|
234 | 224 | sampling_params, deadline = await self._validate_and_convert_params(
|
@@ -326,11 +316,7 @@ async def GenerateStream(
|
326 | 316 | start_time = time.time()
|
327 | 317 | service_metrics.count_generate_request()
|
328 | 318 | request_id = self.request_id(context)
|
329 |
| - adapter_kwargs = ( |
330 |
| - await self._validate_adapters(request, context) |
331 |
| - if adapters_available |
332 |
| - else {} |
333 |
| - ) |
| 319 | + adapter_kwargs = await self._validate_adapters(request, context) |
334 | 320 | tokenizer = await self._get_tokenizer(adapter_kwargs)
|
335 | 321 |
|
336 | 322 | sampling_params, deadline = await self._validate_and_convert_params(
|
|
0 commit comments