Skip to content

Commit 3f772c7

Browse files
authored
upgrade pytorch to 2.7 (#434)
1 parent cfc8c75 commit 3f772c7

File tree

12 files changed

+43
-183
lines changed

12 files changed

+43
-183
lines changed

.github/workflows/build_wheel.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ jobs:
2424
fail-fast: false
2525
matrix:
2626
python: ["3.9", "3.10", "3.11", "3.12"]
27-
cuda: ["11.8", "12.4"]
28-
torch: ["2.5.1", "2.6.0"]
27+
cuda: ["11.8", "12.6", "12.8"]
28+
torch: ["2.7.0"]
2929
runs-on: [self-hosted, linux, release]
3030
env:
3131
PYTHON_VERSION: ${{ matrix.python }}

.github/workflows/package_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ jobs:
3939
fail-fast: false
4040
matrix:
4141
python: ["3.12"]
42-
cuda: ["12.4"]
43-
torch: ["2.6.0"]
42+
cuda: ["12.6"]
43+
torch: ["2.7.0"]
4444
runs-on: [self-hosted, linux, build]
4545
env:
4646
PYTHON_VERSION: ${{ matrix.python }}

.github/workflows/publish_cpp_image.yml

Lines changed: 5 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -48,25 +48,25 @@ jobs:
4848
vectorchai/scalellm-gateway:${{ inputs.tag }}
4949
vectorchai/scalellm-gateway:latest
5050
51-
publish_scalellm_cuda121:
51+
publish_scalellm_cuda126:
5252
runs-on: [self-hosted, linux, release]
5353
steps:
5454
- name: Checkout repository
5555
uses: actions/checkout@v4
5656
with:
5757
submodules: recursive
5858

59-
- name: Build ScaleLLM for cuda 12.1
59+
- name: Build ScaleLLM for cuda 12.6
6060
timeout-minutes: 60
6161
run: |
62-
docker pull vectorchai/scalellm_devel:cuda12.1
62+
docker pull vectorchai/scalellm_devel:cuda12.6
6363
docker run --rm -t \
6464
-v "$CI_CACHE_DIR":/ci_cache \
6565
-v "$GITHUB_WORKSPACE":/ScaleLLM \
6666
-e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
6767
-e CCACHE_DIR=/ci_cache/.ccache \
6868
-u $(id -u):$(id -g) \
69-
vectorchai/scalellm_devel:cuda12.1 \
69+
vectorchai/scalellm_devel:cuda12.6 \
7070
bash /ScaleLLM/scripts/build_scalellm.sh
7171
7272
- name: Set up QEMU
@@ -81,7 +81,7 @@ jobs:
8181
username: ${{ secrets.DOCKER_HUB_USER }}
8282
password: ${{ secrets.DOCKER_HUB_TOKEN }}
8383

84-
- name: Build and push scalellm docker image for cuda 12.1
84+
- name: Build and push scalellm docker image for cuda 12.6
8585
uses: docker/build-push-action@v5
8686
with:
8787
context: .
@@ -136,47 +136,3 @@ jobs:
136136
vectorchai/scalellm_cu118:${{ inputs.tag }}
137137
vectorchai/scalellm_cu118:latest
138138
139-
publish_scalellm_cuda124:
140-
runs-on: [self-hosted, linux, release]
141-
steps:
142-
- name: Checkout repository
143-
uses: actions/checkout@v4
144-
with:
145-
submodules: recursive
146-
147-
- name: Build ScaleLLM for cuda 12.4
148-
timeout-minutes: 60
149-
run: |
150-
docker pull vectorchai/scalellm_devel:cuda12.4
151-
docker run --rm -t \
152-
-v "$CI_CACHE_DIR":/ci_cache \
153-
-v "$GITHUB_WORKSPACE":/ScaleLLM \
154-
-e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
155-
-e CCACHE_DIR=/ci_cache/.ccache \
156-
-u $(id -u):$(id -g) \
157-
vectorchai/scalellm_devel:cuda12.4 \
158-
bash /ScaleLLM/scripts/build_scalellm.sh
159-
160-
- name: Set up QEMU
161-
uses: docker/setup-qemu-action@v3
162-
163-
- name: Set up Docker Buildx
164-
uses: docker/setup-buildx-action@v3
165-
166-
- name: Login to Docker Hub
167-
uses: docker/login-action@v3
168-
with:
169-
username: ${{ secrets.DOCKER_HUB_USER }}
170-
password: ${{ secrets.DOCKER_HUB_TOKEN }}
171-
172-
- name: Build and push scalellm for cuda 12.4
173-
uses: docker/build-push-action@v5
174-
with:
175-
context: .
176-
file: ./docker/Dockerfile.scalellm
177-
push: true
178-
no-cache: true
179-
tags: |
180-
vectorchai/scalellm_cu124:${{ inputs.tag }}
181-
vectorchai/scalellm_cu124:latest
182-

.github/workflows/publish_devel_image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
strategy:
2222
fail-fast: false
2323
matrix:
24-
cuda: ["12.4", "12.6", "12.8"]
24+
cuda: ["12.6", "12.8"]
2525
gcc: ["12"]
2626
include: # build cuda 11.8 with gcc 11
2727
- cuda: "11.8"

.github/workflows/publish_manylinux_image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
strategy:
2222
fail-fast: false
2323
matrix:
24-
cuda: ["11.8", "12.4", "12.6"]
24+
cuda: ["11.8", "12.6", "12.8"]
2525
runs-on: [self-hosted, linux, build]
2626
steps:
2727
- name: Checkout repository

.github/workflows/publish_wheel.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ jobs:
2222
strategy:
2323
matrix:
2424
python: ["3.9", "3.10", "3.11", "3.12"]
25-
cuda: ["12.4"]
26-
torch: ["2.6.0"]
25+
cuda: ["12.6"]
26+
torch: ["2.7.0"]
2727
runs-on: [self-hosted, linux, release]
2828
env:
2929
PYTHON_VERSION: ${{ matrix.python }}

.github/workflows/release_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ jobs:
2020
fail-fast: false
2121
matrix:
2222
python: ["3.9", "3.10", "3.11", "3.12"]
23-
cuda: ["12.4"]
24-
torch: ["2.6.0"]
23+
cuda: ["12.6"]
24+
torch: ["2.7.0"]
2525
runs-on: [self-hosted, linux, release]
2626
env:
2727
PYTHON_VERSION: ${{ matrix.python }}

CMakeLists.txt

Lines changed: 17 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.26)
22
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
33

44
option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON)
5-
option(USE_CXX11_ABI "Use the new C++-11 ABI, which is not backwards compatible." ON)
65
option(USE_MANYLINUX "Build for manylinux" OFF)
76

87
option(BUILD_NVBENCH "Build the nvbench binary" OFF)
@@ -42,15 +41,6 @@ if(NOT CMAKE_BUILD_TYPE)
4241
)
4342
endif()
4443

45-
# Convert the bool variable to integer.
46-
if(USE_CXX11_ABI)
47-
set(USE_CXX11_ABI 1)
48-
message(STATUS "Using the C++-11 ABI.")
49-
else()
50-
set(USE_CXX11_ABI 0)
51-
message(STATUS "Using the pre C++-11 ABI.")
52-
endif()
53-
5444
if(USE_CCACHE)
5545
find_program(CCACHE_PROGRAM ccache)
5646
if(CCACHE_PROGRAM)
@@ -111,20 +101,11 @@ else()
111101
set(VCPKG_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/vcpkg-src)
112102
endif()
113103

114-
if (USE_CXX11_ABI)
115-
FetchContent_Declare(vcpkg
116-
GIT_REPOSITORY "https://github.com/microsoft/vcpkg.git"
117-
GIT_TAG "2024.02.14"
118-
SOURCE_DIR ${VCPKG_SOURCE_DIR}
119-
)
120-
else()
121-
FetchContent_Declare(vcpkg
122-
GIT_REPOSITORY "https://github.com/vectorch-ai/vcpkg.git"
123-
GIT_TAG "ffc42e97c866ce9692f5c441394832b86548422c" # disable cxx11_abi
124-
SOURCE_DIR ${VCPKG_SOURCE_DIR}
125-
)
126-
message(STATUS "Using custom vcpkg with cxx11_abi disabled")
127-
endif()
104+
FetchContent_Declare(vcpkg
105+
GIT_REPOSITORY "https://github.com/microsoft/vcpkg.git"
106+
GIT_TAG "2024.02.14"
107+
SOURCE_DIR ${VCPKG_SOURCE_DIR}
108+
)
128109
FetchContent_MakeAvailable(vcpkg)
129110

130111
message(STATUS "Downloading and using vcpkg at ${vcpkg_SOURCE_DIR}")
@@ -179,12 +160,9 @@ endif()
179160

180161
find_package(NCCL REQUIRED)
181162

182-
if (USE_CXX11_ABI)
183-
# only use jemalloc if using the new C++-11 ABI
184-
find_package(Jemalloc)
185-
if(Jemalloc_FOUND)
186-
link_libraries(Jemalloc::jemalloc)
187-
endif()
163+
find_package(Jemalloc)
164+
if(Jemalloc_FOUND)
165+
link_libraries(Jemalloc::jemalloc)
188166
endif()
189167

190168
# Important Note: Always invoke find_package for other dependencies
@@ -195,27 +173,15 @@ if (DEFINED ENV{LIBTORCH_ROOT})
195173
message(STATUS "Using libtorch at $ENV{LIBTORCH_ROOT}")
196174
else()
197175
include(FetchContent)
198-
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
199-
# download libtorch 2.6.0 with cuda 12.4 from pytorch.org
200-
if (USE_CXX11_ABI)
201-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu124.zip")
202-
else()
203-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.6.0%2Bcu124.zip")
204-
endif()
205-
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1)
206-
# download libtorch 2.6.0 with cuda 12.1 from pytorch.org
207-
if (USE_CXX11_ABI)
208-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu121.zip")
209-
else()
210-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.6.0%2Bcu121.zip")
211-
endif()
176+
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.8)
177+
# download libtorch 2.7.0 with cuda 12.8 from pytorch.org
178+
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu128/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu128.zip")
179+
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.6)
180+
# download libtorch 2.7.0 with cuda 12.6 from pytorch.org
181+
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu126/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu126.zip")
212182
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8)
213-
# download libtorch 2.6.0 with cuda 11.8 from pytorch.org
214-
if (USE_CXX11_ABI)
215-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu118.zip")
216-
else()
217-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.6.0%2Bcu118.zip")
218-
endif()
183+
# download libtorch 2.7.0 with cuda 11.8 from pytorch.org
184+
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu118.zip")
219185
else()
220186
# error out if cuda version is not supported
221187
message(FATAL_ERROR "Unsupported CUDA version: ${CUDAToolkit_VERSION}")
@@ -234,18 +200,7 @@ else()
234200
FetchContent_MakeAvailable(libtorch)
235201

236202
find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
237-
message(STATUS "Downloading and using libtorch 2.6.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
238-
endif()
239-
240-
# check if USE_CXX11_ABI is set correctly
241-
if (DEFINED USE_CXX11_ABI)
242-
parse_make_options(${TORCH_CXX_FLAGS} "TORCH_CXX_FLAGS")
243-
if(DEFINED TORCH_CXX_FLAGS__GLIBCXX_USE_CXX11_ABI
244-
AND NOT ${TORCH_CXX_FLAGS__GLIBCXX_USE_CXX11_ABI} EQUAL ${USE_CXX11_ABI})
245-
message(FATAL_ERROR
246-
"The libtorch compilation options _GLIBCXX_USE_CXX11_ABI=${TORCH_CXX_FLAGS__GLIBCXX_USE_CXX11_ABI} "
247-
"found by CMake conflict with the project setting USE_CXX11_ABI=${USE_CXX11_ABI}.")
248-
endif()
203+
message(STATUS "Downloading and using libtorch 2.7.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
249204
endif()
250205

251206
# carry over torch flags to the rest of the project

docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
1212

1313
.. code-block:: bash
1414
15-
# Install ScaleLLM with CUDA 12.4 and Pytorch 2.6.0
15+
# Install ScaleLLM with CUDA 12.6 and Pytorch 2.7.0
1616
$ pip install -U scalellm
1717
1818

docs/source/quick_start.rst

Lines changed: 9 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
1212

1313
.. code-block:: bash
1414
15-
# Install ScaleLLM with CUDA 12.4 and Pytorch 2.5.1
15+
# Install ScaleLLM with CUDA 12.6 and Pytorch 2.7.0
1616
$ pip install scalellm
1717
1818
Install other versions
@@ -21,71 +21,35 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
2121

2222
.. tabs::
2323

24-
.. tab:: CUDA 12.4
24+
.. tab:: CUDA 12.8
2525

2626
.. tabs::
2727

28-
.. tab:: PyTorch 2.6.0
28+
.. tab:: PyTorch 2.7.0
2929

3030
.. code-block:: bash
3131
32-
$ pip install -U scalellm -i https://whl.vectorch.com/cu124/torch2.6.0/
32+
$ pip install -U scalellm -i https://whl.vectorch.com/cu128/torch2.7.0/
3333
34-
.. tab:: PyTorch 2.5.1
35-
36-
.. code-block:: bash
37-
38-
$ pip install -U scalellm -i https://whl.vectorch.com/cu124/torch2.5.1/
39-
40-
.. tab:: PyTorch 2.4.1
41-
42-
.. code-block:: bash
43-
44-
$ pip install -U scalellm -i https://whl.vectorch.com/cu124/torch2.4.1/
45-
46-
.. tab:: CUDA 12.1
34+
.. tab:: CUDA 12.6
4735

4836
.. tabs::
4937

50-
.. tab:: PyTorch 2.6.0
51-
52-
.. code-block:: bash
53-
54-
$ pip install -U scalellm -i https://whl.vectorch.com/cu121/torch2.6.0/
55-
56-
.. tab:: PyTorch 2.5.1
38+
.. tab:: PyTorch 2.7.0
5739

5840
.. code-block:: bash
5941
60-
$ pip install -U scalellm -i https://whl.vectorch.com/cu121/torch2.5.1/
61-
62-
.. tab:: PyTorch 2.4.1
63-
64-
.. code-block:: bash
65-
66-
$ pip install -U scalellm -i https://whl.vectorch.com/cu121/torch2.4.1/
42+
$ pip install -U scalellm -i https://whl.vectorch.com/cu126/torch2.7.0/
6743
6844
.. tab:: CUDA 11.8
6945

7046
.. tabs::
7147

72-
.. tab:: PyTorch 2.6.0
73-
74-
.. code-block:: bash
75-
76-
$ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.6.0/
77-
78-
.. tab:: PyTorch 2.5.1
79-
80-
.. code-block:: bash
81-
82-
$ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.5.1/
83-
84-
.. tab:: PyTorch 2.4.1
48+
.. tab:: PyTorch 2.7.0
8549

8650
.. code-block:: bash
8751
88-
$ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.4.1/
52+
$ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.7.0/
8953
9054
9155
Build from source

0 commit comments

Comments
 (0)