Skip to content

Commit 5c46937

Browse files
authored
[Version] Bump version to v0.7.3 (ROCm#453)
* update vllm version to v0.7.3 Signed-off-by: tjtanaa <[email protected]> * fix linter Signed-off-by: tjtanaa <[email protected]> * remove redundant code Signed-off-by: tjtanaa <[email protected]> --------- Signed-off-by: tjtanaa <[email protected]>
1 parent d7fefdf commit 5c46937

File tree

8 files changed

+28
-7
lines changed

8 files changed

+28
-7
lines changed

benchmarks/kernels/benchmark_mixtral_moe_rocm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
import argparse
23
import json
34
import os

benchmarks/test_accuracy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
import argparse
23
import dataclasses
34

setup.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,9 +499,28 @@ def get_gaudi_sw_version():
499499

500500

501501
def get_vllm_version() -> str:
502-
version = get_version(
502+
# Get the version from setuptools_scm but override the base version
503+
base_version = os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION', '0.7.3')
504+
505+
# Get the full version with git information
506+
full_version = get_version(
503507
write_to="vllm/_version.py", # TODO: move this to pyproject.toml
504508
)
509+
510+
# Extract git information if available
511+
if '+' in full_version:
512+
# Extract the dev and git parts (e.g., from 0.x.y.devN+gHASH)
513+
git_parts = full_version.split('+', 1)[1]
514+
# Combine with our base version
515+
version = f"{base_version}.{full_version.split('+')[0].split('.')[-1]}+{git_parts}" # noqa: E501
516+
elif '.dev' in full_version:
517+
# Handle case where dev number is present but no git hash
518+
dev_part = full_version.split('.dev')[1]
519+
version = f"{base_version}.dev{dev_part}"
520+
else:
521+
# No git info available, just use the base version
522+
version = base_version
523+
505524
sep = "+" if "+" not in version else "." # dev versions might contain +
506525

507526
if _no_device():

tests/kernels/test_semi_structured.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
"""Tests for sparse cutlass kernels
23
34
Run `pytest tests/kernels/test_semi_structured.py`.

vllm/attention/ops/paged_attn_ater.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
from dataclasses import dataclass
23
from typing import List, Optional, Tuple
34

vllm/entrypoints/fast_sync_llm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
import multiprocessing as mp
23
from queue import Empty
34
from typing import Union
@@ -85,8 +86,8 @@ def run_engine(self):
8586
try:
8687
while True:
8788
poll_interval -= 1
88-
if (self.input_queue.qsize() >=
89-
envs.VLLM_SYNC_SERVER_ACCUM_REQUESTS
89+
if (self.input_queue.qsize()
90+
>= envs.VLLM_SYNC_SERVER_ACCUM_REQUESTS
9091
or poll_interval <= 0
9192
or not self.llm_engine.has_unfinished_requests()):
9293
self._poll_requests()

vllm/entrypoints/sync_openai/api_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# SPDX-License-Identifier: Apache-2.0
12
import asyncio
23
import multiprocessing
34
import re

vllm/model_executor/model_loader/loader.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,10 +1063,6 @@ def _load_weights(self, model_config: ModelConfig,
10631063
if hf_to_vllm_mapper := getattr(model, "hf_to_vllm_mapper", None):
10641064
self.weight_mapper = lambda name: hf_to_vllm_mapper._map_name(name)
10651065

1066-
# For some models like Molmo, we need to use hf_to_vllm_mapper
1067-
# to ensure correct loading of weights.
1068-
if hf_to_vllm_mapper := getattr(model, "hf_to_vllm_mapper", None):
1069-
self.weight_mapper = lambda name: hf_to_vllm_mapper._map_name(name)
10701066
# Modules whose weights might have fused on disk
10711067
# we need their output_sizes to make shard in flight correctly with TP
10721068
self.maybe_fused_weights_modules: Dict[str, List[int]] = {}

0 commit comments

Comments
 (0)