pyg-team · Looong01 · Aug 14, 2025 · Aug 14, 2025 · Aug 15, 2025 · Aug 15, 2025
diff --git a/.gitmodules b/.gitmodules
@@ -13,3 +13,9 @@
 [submodule "third_party/cccl"]
     path = third_party/cccl
     url = https://github.com/NVIDIA/cccl.git
+[submodule "third_party/hipCollections"]
+	path = third_party/hipCollections
+	url = https://github.com/ROCm/hipCollections.git
+[submodule "third_party/libhipcxx"]
+	path = third_party/libhipcxx
+	url = https://github.com/ROCm/libhipcxx.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added PyTorch 2.3 support ([#322](https://github.com/pyg-team/pyg-lib/pull/322))
 - Added Windows support ([#315](https://github.com/pyg-team/pyg-lib/pull/315))
 - Added macOS Apple Silicon support ([#310](https://github.com/pyg-team/pyg-lib/pull/310))
+- Added ROCM 6.4 support ([#507](https://github.com/pyg-team/pyg-lib/pull/507))
 ### Changed
 ### Removed
 - Removed Support for PyTorch 1.13-2.5 ([#532](https://github.com/pyg-team/pyg-lib/pull/532))

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,5 +1,18 @@
 cmake_minimum_required(VERSION 3.18)
-project(pyg)
+if(WITH_ROCM)
+  if(DEFINED ENV{ROCM_PATH} AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
+    set(_ROCM_ROOT "$ENV{ROCM_PATH}")
+  else()
+    set(_ROCM_ROOT "/opt/rocm")
+  endif()
+  set(CMAKE_C_COMPILER  ${_ROCM_ROOT}/bin/hipcc CACHE FILEPATH "" FORCE)
+  set(CMAKE_CXX_COMPILER ${_ROCM_ROOT}/bin/hipcc CACHE FILEPATH "" FORCE)
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  set(THREADS_PREFER_PTHREAD_FLAG ON)
+  project(pyg LANGUAGES C CXX HIP)
+else()
+  project(pyg)
+endif()
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
@@ -10,6 +23,10 @@ option(BUILD_TEST "Enable testing" OFF)
 option(BUILD_BENCHMARK "Enable benchmarks" OFF)
 option(WITH_COV "Enable code coverage" OFF)
 option(WITH_CUDA "Enable CUDA support" OFF)
+option(WITH_ROCM "Enable ROCm/HIP support" OFF)
+if (WITH_ROCM AND WITH_CUDA)
+  message(FATAL_ERROR "WITH_ROCM and WITH_CUDA cannot both be ON")
+endif()
 
 if(NOT WIN32 AND NOT DEFINED USE_CXX11_ABI)
   find_package(Python3 COMPONENTS Interpreter REQUIRED)
@@ -75,11 +92,41 @@ if(WITH_CUDA)
   include_directories(${CUCOLLECTIONS_DIR})
 endif()
 
+if (WITH_ROCM)
+  if (CMAKE_VERSION VERSION_LESS "3.23.1")
+    message(FATAL_ERROR "WITH_ROCM requires CMake >= 3.23.1 (now ${CMAKE_VERSION})")
+  endif()
+  enable_language(HIP)
+  add_definitions(-DWITH_ROCM)
+  find_package(Threads REQUIRED)
+  find_package(hip REQUIRED)
+  find_package(hipblaslt REQUIRED)
+  find_package(rocblas REQUIRED)
+  find_package(rocprim REQUIRED)
+  find_package(rocthrust REQUIRED)
+  find_package(composable_kernel REQUIRED)
+  set(LIBHIPCXX_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/libhipcxx")
+  set(HIP_COLLECTIONS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/hipCollections/include)
+  find_package(libhipcxx REQUIRED CONFIG  PATHS "${LIBHIPCXX_DIR}/lib/cmake/libhipcxx"  NO_DEFAULT_PATH)
+  include_directories(${HIP_COLLECTIONS_DIR})
+endif()
+
 set(CSRC pyg_lib/csrc)
 file(GLOB_RECURSE ALL_SOURCES ${CSRC}/*.cpp)
 if (WITH_CUDA)
   file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.cu)
 endif()
+if (WITH_ROCM)
+  file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.hip)
+  # CK half math utilities require implicit half conversions/operators.
+  # PyTorch ROCm compile flags may define __HIP_NO_HALF_* globally.
+  # Restrict macro undefinition to the CK matmul TU.
+  set(PYG_ROCM_MATMUL_HIP "${CMAKE_CURRENT_SOURCE_DIR}/pyg_lib/csrc/ops/rocm/matmul_kernel.hip")
+  if(EXISTS "${PYG_ROCM_MATMUL_HIP}")
+    set_source_files_properties("${PYG_ROCM_MATMUL_HIP}" PROPERTIES
+      COMPILE_OPTIONS "-U__HIP_NO_HALF_OPERATORS__;-U__HIP_NO_HALF_CONVERSIONS__")
+  endif()
+endif()
 add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES})
 target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 if(MKL_INCLUDE_FOUND)
@@ -110,6 +157,16 @@ if (NOT MSVC)
   include_directories(${GKLIB_PATH})
   include_directories("${METIS_DIR}/include")
   add_subdirectory("${METIS_DIR}/libmetis")
+
+  if (WITH_ROCM)
+    if (TARGET metis)
+      set_target_properties(metis PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    endif()
+    if (TARGET GKlib)
+      set_target_properties(GKlib PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    endif()
+  endif()
+
   target_link_libraries(${PROJECT_NAME} PRIVATE metis)
 endif()
 
@@ -129,6 +186,25 @@ if(WITH_CUDA)
                              third_party/cccl/libcudacxx/include)
 endif()
 
+if (WITH_ROCM)
+  target_link_libraries(${PROJECT_NAME} PRIVATE Threads::Threads)
+  target_link_libraries(${PROJECT_NAME}
+    PRIVATE
+      hip::device
+      roc::hipblaslt
+      roc::rocblas
+      roc::rocthrust
+      libhipcxx::libhipcxx
+  )
+  if(TARGET composable_kernel::device_gemm_operations)
+    target_link_libraries(${PROJECT_NAME} PRIVATE composable_kernel::device_gemm_operations)
+  elseif(TARGET composablekernels::device_gemm_operations)
+    target_link_libraries(${PROJECT_NAME} PRIVATE composablekernels::device_gemm_operations)
+  else()
+    message(WARNING "composable_kernel::device_gemm_operations target not found; building with CK headers only")
+  endif()
+endif()
+
 set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
 
 if(BUILD_TEST)

diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@
 
 We provide pre-built Python wheels for all major OS/PyTorch/CUDA combinations from Python 3.10 till 3.13, see [here](https://data.pyg.org/whl).
 
-To install the wheels, simply run
+To install the wheels for CPU/CUDA backend, simply run
 
 ```
 pip install pyg-lib -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
@@ -53,6 +53,93 @@ The following combinations are supported:
 | **Windows**  | ✅    | ✅      | ✅      | ✅      |       |
 | **macOS**    | ✅    |         |         |        |        |
 
+For ROCM backend, there is an external [`pyg-rocm-build` repository](https://github.com/Looong01/pyg-rocm-build) provides wheels and detailed instructions on how to install PyG for ROCm.
+If you have any questions about it, please open an issue [here](https://github.com/Looong01/pyg-rocm-build/issues).
+
+**Note:** ROCM backend only support Linux up to now.
+
+### Build from source on a ROCm machine (Linux)
+
+The following steps build and install `pyg-lib` with ROCm/HIP support from source.
+Ensure your ROCm installation includes `hipblaslt`, `rocblas`, `rocprim`,
+`rocthrust`, and `composable_kernel`.
+
+1. Install system build tools:
+
+```bash
+sudo apt update
+sudo apt install -y build-essential python3-dev python3-pip cmake ninja-build
+```
+
+2. Install Python build dependencies:
+
+```bash
+python3 -m pip install --upgrade pip setuptools wheel ninja
+```
+
+3. Install a ROCm-enabled PyTorch build (matching your ROCm stack):
+
+```bash
+# Example:
+# python3 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.3
+```
+
+4. Configure environment variables:
+
+```bash
+export ROCM_PATH=/opt/rocm
+export CMAKE_PREFIX_PATH="${ROCM_PATH};${ROCM_PATH}/lib/cmake"
+export FORCE_ROCM=1
+export FORCE_CUDA=0
+
+# Set your GPU architecture, for example gfx90a/gfx942/gfx1100:
+export PYTORCH_ROCM_ARCH="gfx1100;gfx950;gfx942;gfx90a;gfx908;gfx1201;gfx1101;gfx1030"
+# Alternatively, you can use:
+# export AMDGPU_TARGETS=gfx90a;gfx950;gfx942;gfx90a;gfx908;gfx1201;gfx1101;gfx1030
+# If your hipcc does not recognize one of the targets, remove that target.
+
+# Optional: disable CK grouped matmul path (enabled by default).
+# export PYG_ROCM_MATMUL_USE_CK=0
+# Optional: require CK path (fail fast if fallback would happen).
+# export PYG_ROCM_MATMUL_REQUIRE_CK=1
+```
+
+`grouped_matmul` / `segment_matmul` behavior on ROCm:
+
+- **Important:** The CK backend in `pyg-lib` only provides native kernels for
+  `bf16` and `fp16`.
+- `fp16` input: use CK FP16 grouped GEMM path.
+- `bf16` input: use CK BF16 grouped GEMM path.
+- `fp32` input: CK does not run native FP32 kernels. `pyg-lib` first converts
+  to `bf16` and tries CK BF16, then converts to `fp16` and tries CK FP16.
+- Since `fp32` uses reduced-precision conversion on the CK path, numerical
+  differences at `bf16/fp16` precision are expected.
+- `PYG_ROCM_MATMUL_USE_CK=0`: disable CK grouped matmul and use ATen matmul.
+- `PYG_ROCM_MATMUL_REQUIRE_CK=1`: strict mode. If no CK path is accepted, an
+  error is raised instead of falling back.
+- Without strict mode, unsupported CK shapes/targets fall back to `at::mm_out`
+  with a warning that includes the reason.
+- On architectures without CK XDL support for the selected path (for example
+  some `gfx10` targets), fallback warnings are expected.
+
+5. Build and install:
+
+```bash
+python3 -m pip install -v .
+```
+
+For editable/development install:
+
+```bash
+python3 -m pip install -v -e .
+```
+
+Optional check:
+
+```bash
+python3 -c "import torch; print(torch.version.hip)"
+```
+
 ### From nightly
 
 Nightly wheels are provided for Linux from Python 3.10 till 3.13:

diff --git a/benchmark/sampler/neighbor.py b/benchmark/sampler/neighbor.py
@@ -38,6 +38,11 @@
                        default=['pyg-lib', 'torch-sparse', 'dgl'])
 args = argparser.parse_args()
 
+if not args.directed and 'pyg-lib' in args.libraries:
+    raise ValueError(
+        "pyg-lib neighbor sampling does not support directed=False. "
+        "Run with --directed, or remove pyg-lib from --libraries.")
+
 
 @withSeed
 @withDataset('DIMACS10', 'citationCiteseer')