File tree Expand file tree Collapse file tree 8 files changed +59
-54
lines changed Expand file tree Collapse file tree 8 files changed +59
-54
lines changed Original file line number Diff line number Diff line change 11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
4+ import enum
45from abc import ABC , abstractmethod
56from collections .abc import AsyncGenerator , Iterable , Mapping
67from typing import Any
1516from vllm .sampling_params import SamplingParams
1617from vllm .tasks import SupportedTask
1718from vllm .transformers_utils .tokenizer import AnyTokenizer
18- from vllm .utils import Device
1919from vllm .v1 .engine import EngineCoreRequest
2020from vllm .v1 .engine .processor import Processor
2121
2222logger = init_logger (__name__ )
2323
2424
25+ class Device (enum .Enum ):
26+ GPU = enum .auto ()
27+ CPU = enum .auto ()
28+
29+
2530class EngineClient (ABC ):
2631 """Protocol class for Clients to Engine"""
2732
Original file line number Diff line number Diff line change 3131 TokenizerMode ,
3232)
3333from vllm .engine .arg_utils import EngineArgs
34+ from vllm .engine .protocol import Device
3435from vllm .entrypoints .chat_utils import (
3536 ChatCompletionMessageParam ,
3637 ChatTemplateContentFormatOption ,
7576 get_cached_tokenizer ,
7677)
7778from vllm .usage .usage_lib import UsageContext
78- from vllm .utils import Counter , Device
7979from vllm .utils .collection_utils import as_iter , is_list_of
80+ from vllm .utils .counter import Counter
8081from vllm .v1 .engine import EngineCoreRequest
8182from vllm .v1 .engine .llm_engine import LLMEngine
8283from vllm .v1 .sample .logits_processor import LogitsProcessor
@@ -1490,8 +1491,8 @@ def start_profile(self) -> None:
14901491 def stop_profile (self ) -> None :
14911492 self .llm_engine .stop_profile ()
14921493
1493- def reset_prefix_cache (self , device : Device | None = None ) -> bool :
1494- return self .llm_engine .reset_prefix_cache (device )
1494+ def reset_prefix_cache (self , device : Device | None = None ) -> None :
1495+ self .llm_engine .reset_prefix_cache (device )
14951496
14961497 def sleep (self , level : int = 1 ):
14971498 """
Original file line number Diff line number Diff line change 4040import vllm .envs as envs
4141from vllm .config import VllmConfig
4242from vllm .engine .arg_utils import AsyncEngineArgs
43- from vllm .engine .protocol import EngineClient
43+ from vllm .engine .protocol import Device , EngineClient
4444from vllm .entrypoints .launcher import serve_http
4545from vllm .entrypoints .logger import RequestLogger
4646from vllm .entrypoints .openai .cli_args import make_arg_parser , validate_parsed_serve_args
108108from vllm .logger import init_logger
109109from vllm .reasoning import ReasoningParserManager
110110from vllm .usage .usage_lib import UsageContext
111- from vllm .utils import Device
112111from vllm .utils .argparse_utils import FlexibleArgumentParser
113112from vllm .utils .network_utils import is_valid_ipv6_address
114113from vllm .utils .system_utils import decorate_logs , set_ulimit
Original file line number Diff line number Diff line change 1919from vllm .logger import init_logger
2020from vllm .lora .request import LoRARequest
2121from vllm .lora .resolver import LoRAResolver , LoRAResolverRegistry
22- from vllm .utils import AtomicCounter
22+ from vllm .utils . counter import AtomicCounter
2323
2424logger = init_logger (__name__ )
2525
Original file line number Diff line number Diff line change 33
44import enum
55import inspect
6- import threading
76import uuid
87import warnings
98from functools import wraps
@@ -68,54 +67,11 @@ def __dir__() -> list[str]:
6867T = TypeVar ("T" )
6968
7069
71- class Device (enum .Enum ):
72- GPU = enum .auto ()
73- CPU = enum .auto ()
74-
75-
7670class LayerBlockType (enum .Enum ):
7771 attention = "attention"
7872 mamba = "mamba"
7973
8074
81- class Counter :
82- def __init__ (self , start : int = 0 ) -> None :
83- self .counter = start
84-
85- def __next__ (self ) -> int :
86- i = self .counter
87- self .counter += 1
88- return i
89-
90- def reset (self ) -> None :
91- self .counter = 0
92-
93-
94- class AtomicCounter :
95- """An atomic, thread-safe counter"""
96-
97- def __init__ (self , initial = 0 ):
98- """Initialize a new atomic counter to given initial value"""
99- self ._value = initial
100- self ._lock = threading .Lock ()
101-
102- def inc (self , num = 1 ):
103- """Atomically increment the counter by num and return the new value"""
104- with self ._lock :
105- self ._value += num
106- return self ._value
107-
108- def dec (self , num = 1 ):
109- """Atomically decrement the counter by num and return the new value"""
110- with self ._lock :
111- self ._value -= num
112- return self ._value
113-
114- @property
115- def value (self ):
116- return self ._value
117-
118-
11975def random_uuid () -> str :
12076 return str (uuid .uuid4 ().hex )
12177
Original file line number Diff line number Diff line change 1+ # SPDX-License-Identifier: Apache-2.0
2+ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+ import threading
4+
5+
6+ class Counter :
7+ def __init__ (self , start : int = 0 ) -> None :
8+ super ().__init__ ()
9+
10+ self .counter = start
11+
12+ def __next__ (self ) -> int :
13+ i = self .counter
14+ self .counter += 1
15+ return i
16+
17+ def reset (self ) -> None :
18+ self .counter = 0
19+
20+
21+ class AtomicCounter :
22+ """An atomic, thread-safe counter"""
23+
24+ def __init__ (self , initial : int = 0 ) -> None :
25+ """Initialize a new atomic counter to given initial value"""
26+ super ().__init__ ()
27+
28+ self ._value = initial
29+ self ._lock = threading .Lock ()
30+
31+ @property
32+ def value (self ) -> int :
33+ return self ._value
34+
35+ def inc (self , num : int = 1 ) -> int :
36+ """Atomically increment the counter by num and return the new value"""
37+ with self ._lock :
38+ self ._value += num
39+ return self ._value
40+
41+ def dec (self , num : int = 1 ) -> int :
42+ """Atomically decrement the counter by num and return the new value"""
43+ with self ._lock :
44+ self ._value -= num
45+ return self ._value
Original file line number Diff line number Diff line change 1414import vllm .envs as envs
1515from vllm .config import VllmConfig
1616from vllm .engine .arg_utils import AsyncEngineArgs
17- from vllm .engine .protocol import EngineClient
17+ from vllm .engine .protocol import Device , EngineClient
1818from vllm .entrypoints .utils import _validate_truncation_size
1919from vllm .inputs import PromptType
2020from vllm .logger import init_logger
2929from vllm .transformers_utils .config import maybe_register_config_serialize_by_value
3030from vllm .transformers_utils .tokenizer import AnyTokenizer , init_tokenizer_from_configs
3131from vllm .usage .usage_lib import UsageContext
32- from vllm .utils import Device
3332from vllm .utils .async_utils import cancel_task_threadsafe
3433from vllm .utils .collection_utils import as_list
3534from vllm .utils .func_utils import deprecate_kwargs
Original file line number Diff line number Diff line change 1414from vllm .distributed import stateless_destroy_torch_distributed_process_group
1515from vllm .distributed .parallel_state import get_dp_group
1616from vllm .engine .arg_utils import EngineArgs
17+ from vllm .engine .protocol import Device
1718from vllm .inputs import PromptType
1819from vllm .logger import init_logger
1920from vllm .lora .request import LoRARequest
2627from vllm .tracing import init_tracer
2728from vllm .transformers_utils .tokenizer import AnyTokenizer , init_tokenizer_from_configs
2829from vllm .usage .usage_lib import UsageContext
29- from vllm .utils import Device
3030from vllm .v1 .engine import EngineCoreRequest
3131from vllm .v1 .engine .core_client import EngineCoreClient
3232from vllm .v1 .engine .output_processor import OutputProcessor
You can’t perform that action at this time.
0 commit comments