upgrade pytorch to 2.7 (#434)

guocuimi · web-flow · commit 3f772c70240f · 2025-04-23T13:33:18.000-07:00
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
@@ -24,8 +24,8 @@ jobs:
       fail-fast: false
       matrix:
         python: ["3.9", "3.10", "3.11", "3.12"]
-        cuda: ["11.8", "12.4"]
-        torch: ["2.5.1", "2.6.0"]
+        cuda: ["11.8", "12.6", "12.8"]
+        torch: ["2.7.0"]
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/.github/workflows/package_test.yml b/.github/workflows/package_test.yml
@@ -39,8 +39,8 @@ jobs:
       fail-fast: false
       matrix:
         python: ["3.12"]
-        cuda: ["12.4"]
-        torch: ["2.6.0"]
+        cuda: ["12.6"]
+        torch: ["2.7.0"]
     runs-on: [self-hosted, linux, build]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/.github/workflows/publish_cpp_image.yml b/.github/workflows/publish_cpp_image.yml
@@ -48,25 +48,25 @@ jobs:
             vectorchai/scalellm-gateway:${{ inputs.tag }}
             vectorchai/scalellm-gateway:latest
 
-  publish_scalellm_cuda121:
+  publish_scalellm_cuda126:
     runs-on: [self-hosted, linux, release]
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
           submodules: recursive
 
-      - name: Build ScaleLLM for cuda 12.1
+      - name: Build ScaleLLM for cuda 12.6
         timeout-minutes: 60
         run: |
-          docker pull vectorchai/scalellm_devel:cuda12.1
+          docker pull vectorchai/scalellm_devel:cuda12.6
           docker run --rm -t \
             -v "$CI_CACHE_DIR":/ci_cache \
             -v "$GITHUB_WORKSPACE":/ScaleLLM \
             -e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
             -e CCACHE_DIR=/ci_cache/.ccache \
             -u $(id -u):$(id -g) \
-            vectorchai/scalellm_devel:cuda12.1 \
+            vectorchai/scalellm_devel:cuda12.6 \
             bash /ScaleLLM/scripts/build_scalellm.sh
 
       - name: Set up QEMU
@@ -81,7 +81,7 @@ jobs:
           username: ${{ secrets.DOCKER_HUB_USER }}
           password: ${{ secrets.DOCKER_HUB_TOKEN }}
 
-      - name: Build and push scalellm docker image for cuda 12.1
+      - name: Build and push scalellm docker image for cuda 12.6
         uses: docker/build-push-action@v5
         with:
           context: .
@@ -136,47 +136,3 @@ jobs:
             vectorchai/scalellm_cu118:${{ inputs.tag }}
             vectorchai/scalellm_cu118:latest
 
-  publish_scalellm_cuda124:
-    runs-on: [self-hosted, linux, release]
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-
-      - name: Build ScaleLLM for cuda 12.4
-        timeout-minutes: 60
-        run: |
-          docker pull vectorchai/scalellm_devel:cuda12.4
-          docker run --rm -t \
-            -v "$CI_CACHE_DIR":/ci_cache \
-            -v "$GITHUB_WORKSPACE":/ScaleLLM \
-            -e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
-            -e CCACHE_DIR=/ci_cache/.ccache \
-            -u $(id -u):$(id -g) \
-            vectorchai/scalellm_devel:cuda12.4 \
-            bash /ScaleLLM/scripts/build_scalellm.sh
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_HUB_USER }}
-          password: ${{ secrets.DOCKER_HUB_TOKEN }}
-
-      - name: Build and push scalellm for cuda 12.4
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./docker/Dockerfile.scalellm
-          push: true
-          no-cache: true
-          tags: |
-            vectorchai/scalellm_cu124:${{ inputs.tag }}
-            vectorchai/scalellm_cu124:latest
-
diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        cuda: ["12.4", "12.6", "12.8"]
+        cuda: ["12.6", "12.8"]
         gcc: ["12"]
         include: # build cuda 11.8 with gcc 11
           - cuda: "11.8"
diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        cuda: ["11.8", "12.4", "12.6"]
+        cuda: ["11.8", "12.6", "12.8"]
     runs-on: [self-hosted, linux, build]
     steps:
       - name: Checkout repository
diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml
@@ -22,8 +22,8 @@ jobs:
     strategy:
       matrix:
         python: ["3.9", "3.10", "3.11", "3.12"]
-        cuda: ["12.4"]
-        torch: ["2.6.0"]
+        cuda: ["12.6"]
+        torch: ["2.7.0"]
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml
@@ -20,8 +20,8 @@ jobs:
       fail-fast: false
       matrix:
         python: ["3.9", "3.10", "3.11", "3.12"]
-        cuda: ["12.4"]
-        torch: ["2.6.0"]
+        cuda: ["12.6"]
+        torch: ["2.7.0"]
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.26)
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
 option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON)
-option(USE_CXX11_ABI "Use the new C++-11 ABI, which is not backwards compatible." ON)
 option(USE_MANYLINUX "Build for manylinux" OFF)
 
 option(BUILD_NVBENCH "Build the nvbench binary" OFF)
@@ -42,15 +41,6 @@ if(NOT CMAKE_BUILD_TYPE)
   )
 endif()
 
-# Convert the bool variable to integer.
-if(USE_CXX11_ABI)
-  set(USE_CXX11_ABI 1)
-  message(STATUS "Using the C++-11 ABI.")
-else()
-  set(USE_CXX11_ABI 0)
-  message(STATUS "Using the pre C++-11 ABI.")
-endif()
-
 if(USE_CCACHE)
   find_program(CCACHE_PROGRAM ccache)
   if(CCACHE_PROGRAM)
@@ -111,20 +101,11 @@ else()
     set(VCPKG_SOURCE_DIR ${FETCHCONTENT_BASE_DIR}/vcpkg-src)
   endif()
 
-  if (USE_CXX11_ABI)
-    FetchContent_Declare(vcpkg
-      GIT_REPOSITORY "https://github.com/microsoft/vcpkg.git"
-      GIT_TAG "2024.02.14"
-      SOURCE_DIR ${VCPKG_SOURCE_DIR}
-    )
-  else()
-    FetchContent_Declare(vcpkg
-      GIT_REPOSITORY "https://github.com/vectorch-ai/vcpkg.git"
-      GIT_TAG "ffc42e97c866ce9692f5c441394832b86548422c" # disable cxx11_abi
-      SOURCE_DIR ${VCPKG_SOURCE_DIR}
-    )
-    message(STATUS "Using custom vcpkg with cxx11_abi disabled")
-  endif()
+  FetchContent_Declare(vcpkg
+    GIT_REPOSITORY "https://github.com/microsoft/vcpkg.git"
+    GIT_TAG "2024.02.14"
+    SOURCE_DIR ${VCPKG_SOURCE_DIR}
+  )
   FetchContent_MakeAvailable(vcpkg)
 
   message(STATUS "Downloading and using vcpkg at ${vcpkg_SOURCE_DIR}")
@@ -179,12 +160,9 @@ endif()
 
 find_package(NCCL REQUIRED)
 
-if (USE_CXX11_ABI)
-  # only use jemalloc if using the new C++-11 ABI
-  find_package(Jemalloc)
-  if(Jemalloc_FOUND)
-    link_libraries(Jemalloc::jemalloc)
-  endif()
+find_package(Jemalloc)
+if(Jemalloc_FOUND)
+  link_libraries(Jemalloc::jemalloc)
 endif()
 
 # Important Note: Always invoke find_package for other dependencies
@@ -195,27 +173,15 @@ if (DEFINED ENV{LIBTORCH_ROOT})
   message(STATUS "Using libtorch at $ENV{LIBTORCH_ROOT}")
 else()
   include(FetchContent)
-  if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
-    # download libtorch 2.6.0 with cuda 12.4 from pytorch.org
-    if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu124.zip")
-    else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.6.0%2Bcu124.zip")
-    endif()
-  elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1)
-    # download libtorch 2.6.0 with cuda 12.1 from pytorch.org
-    if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu121.zip")
-    else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.6.0%2Bcu121.zip")
-    endif()
+  if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.8)
+    # download libtorch 2.7.0 with cuda 12.8 from pytorch.org
+    set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu128/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu128.zip")
+  elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.6)
+    # download libtorch 2.7.0 with cuda 12.6 from pytorch.org
+    set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu126/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu126.zip")
   elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8)
-    # download libtorch 2.6.0 with cuda 11.8 from pytorch.org
-    if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu118.zip")
-    else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.6.0%2Bcu118.zip")
-    endif()
+    # download libtorch 2.7.0 with cuda 11.8 from pytorch.org
+    set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu118.zip")
   else()
     # error out if cuda version is not supported
     message(FATAL_ERROR "Unsupported CUDA version: ${CUDAToolkit_VERSION}")
@@ -234,18 +200,7 @@ else()
   FetchContent_MakeAvailable(libtorch)
   
   find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-  message(STATUS "Downloading and using libtorch 2.6.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
-endif()
-
-# check if USE_CXX11_ABI is set correctly
-if (DEFINED USE_CXX11_ABI)
-  parse_make_options(${TORCH_CXX_FLAGS} "TORCH_CXX_FLAGS")
-  if(DEFINED TORCH_CXX_FLAGS__GLIBCXX_USE_CXX11_ABI
-     AND NOT ${TORCH_CXX_FLAGS__GLIBCXX_USE_CXX11_ABI} EQUAL ${USE_CXX11_ABI})
-      message(FATAL_ERROR
-          "The libtorch compilation options _GLIBCXX_USE_CXX11_ABI=${TORCH_CXX_FLAGS__GLIBCXX_USE_CXX11_ABI} "
-          "found by CMake conflict with the project setting USE_CXX11_ABI=${USE_CXX11_ABI}.")
-  endif()
+  message(STATUS "Downloading and using libtorch 2.7.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
 endif()
 
 # carry over torch flags to the rest of the project
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
 
 .. code-block:: bash
 
-   # Install ScaleLLM with CUDA 12.4 and Pytorch 2.6.0
+   # Install ScaleLLM with CUDA 12.6 and Pytorch 2.7.0
    $ pip install -U scalellm
 
 
diff --git a/docs/source/quick_start.rst b/docs/source/quick_start.rst
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
 
 .. code-block:: bash
 
-    # Install ScaleLLM with CUDA 12.4 and Pytorch 2.5.1
+    # Install ScaleLLM with CUDA 12.6 and Pytorch 2.7.0
     $ pip install scalellm
 
 Install other versions
@@ -21,71 +21,35 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
 .. tabs::
 
-    .. tab:: CUDA 12.4
+    .. tab:: CUDA 12.8
 
         .. tabs::
 
-            .. tab:: PyTorch 2.6.0
+            .. tab:: PyTorch 2.7.0
 
                 .. code-block:: bash
 
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu124/torch2.6.0/
+                    $ pip install -U scalellm -i https://whl.vectorch.com/cu128/torch2.7.0/
 
-            .. tab:: PyTorch 2.5.1
-
-                .. code-block:: bash
-
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu124/torch2.5.1/
-
-            .. tab:: PyTorch 2.4.1
-
-                .. code-block:: bash
-
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu124/torch2.4.1/
-
-    .. tab:: CUDA 12.1
+    .. tab:: CUDA 12.6
 
         .. tabs::
 
-            .. tab:: PyTorch 2.6.0
-
-                .. code-block:: bash
-
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu121/torch2.6.0/
-
-            .. tab:: PyTorch 2.5.1
+            .. tab:: PyTorch 2.7.0
 
                 .. code-block:: bash
 
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu121/torch2.5.1/
-
-            .. tab:: PyTorch 2.4.1
-
-                .. code-block:: bash
-
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu121/torch2.4.1/
+                    $ pip install -U scalellm -i https://whl.vectorch.com/cu126/torch2.7.0/
 
     .. tab:: CUDA 11.8
 
         .. tabs::
 
-            .. tab:: PyTorch 2.6.0
-
-                .. code-block:: bash
-
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.6.0/
-
-            .. tab:: PyTorch 2.5.1
-
-                .. code-block:: bash
-
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.5.1/
-
-            .. tab:: PyTorch 2.4.1
+            .. tab:: PyTorch 2.7.0
 
                 .. code-block:: bash
 
-                    $ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.4.1/
+                    $ pip install -U scalellm -i https://whl.vectorch.com/cu118/torch2.7.0/
 
 
 Build from source
diff --git a/setup.py b/setup.py
@@ -15,15 +15,6 @@
 from setuptools.command.build_ext import build_ext
 
 
-def use_cxx11_abi():
-    try:
-        import torch
-
-        return torch._C._GLIBCXX_USE_CXX11_ABI
-    except ImportError:
-        return False
-
-
 def get_torch_root():
     try:
         import torch
@@ -186,12 +177,6 @@ def build_extension(self, ext: CMakeExtension):
         if "CMAKE_ARGS" in os.environ:
             cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
 
-        # check if torch binary is built with cxx11 abi
-        if use_cxx11_abi():
-            cmake_args += ["-DUSE_CXX11_ABI=ON"]
-        else:
-            cmake_args += ["-DUSE_CXX11_ABI=OFF"]
-
         build_args = ["--config", build_type]
         max_jobs = os.getenv("MAX_JOBS", str(os.cpu_count()))
         build_args += ["-j" + max_jobs]
diff --git a/tools/run_in_docker.sh b/tools/run_in_docker.sh