Skip to content

Commit fd58b73

Browse files
authored
Build CUDA11.8 wheels for release (#1596)
1 parent 8efe23f commit fd58b73

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

.github/workflows/publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
os: ['ubuntu-20.04']
5151
python-version: ['3.8', '3.9', '3.10', '3.11']
5252
pytorch-version: ['2.1.0']
53-
cuda-version: ['12.1']
53+
cuda-version: ['11.8', '12.1']
5454

5555
steps:
5656
- name: Checkout

setup.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
ROOT_DIR = os.path.dirname(__file__)
1414

15+
MAIN_CUDA_VERSION = "12.1"
16+
1517
# Supported NVIDIA GPU architectures.
1618
SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
1719

@@ -225,7 +227,7 @@ def get_path(*filepath) -> str:
225227
return os.path.join(ROOT_DIR, *filepath)
226228

227229

228-
def find_version(filepath: str):
230+
def find_version(filepath: str) -> str:
229231
"""Extract version information from the given filepath.
230232
231233
Adapted from https://github.com/ray-project/ray/blob/0b190ee1160eeca9796bc091e07eaebf4c85b511/python/setup.py
@@ -238,6 +240,15 @@ def find_version(filepath: str):
238240
raise RuntimeError("Unable to find version string.")
239241

240242

243+
def get_vllm_version() -> str:
244+
version = find_version(get_path("vllm", "__init__.py"))
245+
cuda_version = str(nvcc_cuda_version)
246+
if cuda_version != MAIN_CUDA_VERSION:
247+
cuda_version_str = cuda_version.replace(".", "")[:3]
248+
version += f"+cu{cuda_version_str}"
249+
return version
250+
251+
241252
def read_readme() -> str:
242253
"""Read the README file if present."""
243254
p = get_path("README.md")
@@ -256,7 +267,7 @@ def get_requirements() -> List[str]:
256267

257268
setuptools.setup(
258269
name="vllm",
259-
version=find_version(get_path("vllm", "__init__.py")),
270+
version=get_vllm_version(),
260271
author="vLLM Team",
261272
license="Apache 2.0",
262273
description=("A high-throughput and memory-efficient inference and "

0 commit comments

Comments
 (0)