Skip to content

Commit f58d9b6

Browse files
[Misc] Separate out utils.counter and move utils.Device to engine (#27588)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 44b5ce9 commit f58d9b6

File tree

8 files changed

+59
-54
lines changed

8 files changed

+59
-54
lines changed

vllm/engine/protocol.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import enum
45
from abc import ABC, abstractmethod
56
from collections.abc import AsyncGenerator, Iterable, Mapping
67
from typing import Any
@@ -15,13 +16,17 @@
1516
from vllm.sampling_params import SamplingParams
1617
from vllm.tasks import SupportedTask
1718
from vllm.transformers_utils.tokenizer import AnyTokenizer
18-
from vllm.utils import Device
1919
from vllm.v1.engine import EngineCoreRequest
2020
from vllm.v1.engine.processor import Processor
2121

2222
logger = init_logger(__name__)
2323

2424

25+
class Device(enum.Enum):
26+
GPU = enum.auto()
27+
CPU = enum.auto()
28+
29+
2530
class EngineClient(ABC):
2631
"""Protocol class for Clients to Engine"""
2732

vllm/entrypoints/llm.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
TokenizerMode,
3232
)
3333
from vllm.engine.arg_utils import EngineArgs
34+
from vllm.engine.protocol import Device
3435
from vllm.entrypoints.chat_utils import (
3536
ChatCompletionMessageParam,
3637
ChatTemplateContentFormatOption,
@@ -75,8 +76,8 @@
7576
get_cached_tokenizer,
7677
)
7778
from vllm.usage.usage_lib import UsageContext
78-
from vllm.utils import Counter, Device
7979
from vllm.utils.collection_utils import as_iter, is_list_of
80+
from vllm.utils.counter import Counter
8081
from vllm.v1.engine import EngineCoreRequest
8182
from vllm.v1.engine.llm_engine import LLMEngine
8283
from vllm.v1.sample.logits_processor import LogitsProcessor
@@ -1490,8 +1491,8 @@ def start_profile(self) -> None:
14901491
def stop_profile(self) -> None:
14911492
self.llm_engine.stop_profile()
14921493

1493-
def reset_prefix_cache(self, device: Device | None = None) -> bool:
1494-
return self.llm_engine.reset_prefix_cache(device)
1494+
def reset_prefix_cache(self, device: Device | None = None) -> None:
1495+
self.llm_engine.reset_prefix_cache(device)
14951496

14961497
def sleep(self, level: int = 1):
14971498
"""

vllm/entrypoints/openai/api_server.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
import vllm.envs as envs
4141
from vllm.config import VllmConfig
4242
from vllm.engine.arg_utils import AsyncEngineArgs
43-
from vllm.engine.protocol import EngineClient
43+
from vllm.engine.protocol import Device, EngineClient
4444
from vllm.entrypoints.launcher import serve_http
4545
from vllm.entrypoints.logger import RequestLogger
4646
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
@@ -108,7 +108,6 @@
108108
from vllm.logger import init_logger
109109
from vllm.reasoning import ReasoningParserManager
110110
from vllm.usage.usage_lib import UsageContext
111-
from vllm.utils import Device
112111
from vllm.utils.argparse_utils import FlexibleArgumentParser
113112
from vllm.utils.network_utils import is_valid_ipv6_address
114113
from vllm.utils.system_utils import decorate_logs, set_ulimit

vllm/entrypoints/openai/serving_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from vllm.logger import init_logger
2020
from vllm.lora.request import LoRARequest
2121
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
22-
from vllm.utils import AtomicCounter
22+
from vllm.utils.counter import AtomicCounter
2323

2424
logger = init_logger(__name__)
2525

vllm/utils/__init__.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
import enum
55
import inspect
6-
import threading
76
import uuid
87
import warnings
98
from functools import wraps
@@ -68,54 +67,11 @@ def __dir__() -> list[str]:
6867
T = TypeVar("T")
6968

7069

71-
class Device(enum.Enum):
72-
GPU = enum.auto()
73-
CPU = enum.auto()
74-
75-
7670
class LayerBlockType(enum.Enum):
7771
attention = "attention"
7872
mamba = "mamba"
7973

8074

81-
class Counter:
82-
def __init__(self, start: int = 0) -> None:
83-
self.counter = start
84-
85-
def __next__(self) -> int:
86-
i = self.counter
87-
self.counter += 1
88-
return i
89-
90-
def reset(self) -> None:
91-
self.counter = 0
92-
93-
94-
class AtomicCounter:
95-
"""An atomic, thread-safe counter"""
96-
97-
def __init__(self, initial=0):
98-
"""Initialize a new atomic counter to given initial value"""
99-
self._value = initial
100-
self._lock = threading.Lock()
101-
102-
def inc(self, num=1):
103-
"""Atomically increment the counter by num and return the new value"""
104-
with self._lock:
105-
self._value += num
106-
return self._value
107-
108-
def dec(self, num=1):
109-
"""Atomically decrement the counter by num and return the new value"""
110-
with self._lock:
111-
self._value -= num
112-
return self._value
113-
114-
@property
115-
def value(self):
116-
return self._value
117-
118-
11975
def random_uuid() -> str:
12076
return str(uuid.uuid4().hex)
12177

vllm/utils/counter.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import threading
4+
5+
6+
class Counter:
7+
def __init__(self, start: int = 0) -> None:
8+
super().__init__()
9+
10+
self.counter = start
11+
12+
def __next__(self) -> int:
13+
i = self.counter
14+
self.counter += 1
15+
return i
16+
17+
def reset(self) -> None:
18+
self.counter = 0
19+
20+
21+
class AtomicCounter:
22+
"""An atomic, thread-safe counter"""
23+
24+
def __init__(self, initial: int = 0) -> None:
25+
"""Initialize a new atomic counter to given initial value"""
26+
super().__init__()
27+
28+
self._value = initial
29+
self._lock = threading.Lock()
30+
31+
@property
32+
def value(self) -> int:
33+
return self._value
34+
35+
def inc(self, num: int = 1) -> int:
36+
"""Atomically increment the counter by num and return the new value"""
37+
with self._lock:
38+
self._value += num
39+
return self._value
40+
41+
def dec(self, num: int = 1) -> int:
42+
"""Atomically decrement the counter by num and return the new value"""
43+
with self._lock:
44+
self._value -= num
45+
return self._value

vllm/v1/engine/async_llm.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import vllm.envs as envs
1515
from vllm.config import VllmConfig
1616
from vllm.engine.arg_utils import AsyncEngineArgs
17-
from vllm.engine.protocol import EngineClient
17+
from vllm.engine.protocol import Device, EngineClient
1818
from vllm.entrypoints.utils import _validate_truncation_size
1919
from vllm.inputs import PromptType
2020
from vllm.logger import init_logger
@@ -29,7 +29,6 @@
2929
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
3030
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
3131
from vllm.usage.usage_lib import UsageContext
32-
from vllm.utils import Device
3332
from vllm.utils.async_utils import cancel_task_threadsafe
3433
from vllm.utils.collection_utils import as_list
3534
from vllm.utils.func_utils import deprecate_kwargs

vllm/v1/engine/llm_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from vllm.distributed import stateless_destroy_torch_distributed_process_group
1515
from vllm.distributed.parallel_state import get_dp_group
1616
from vllm.engine.arg_utils import EngineArgs
17+
from vllm.engine.protocol import Device
1718
from vllm.inputs import PromptType
1819
from vllm.logger import init_logger
1920
from vllm.lora.request import LoRARequest
@@ -26,7 +27,6 @@
2627
from vllm.tracing import init_tracer
2728
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
2829
from vllm.usage.usage_lib import UsageContext
29-
from vllm.utils import Device
3030
from vllm.v1.engine import EngineCoreRequest
3131
from vllm.v1.engine.core_client import EngineCoreClient
3232
from vllm.v1.engine.output_processor import OutputProcessor

0 commit comments

Comments
 (0)