Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
be096a2
Add hipCollections submodule
Looong01 Aug 14, 2025
279114e
Add rocm backend
Looong01 Aug 14, 2025
2ee509f
Fix bugs
Looong01 Aug 15, 2025
421c4b2
Fix bugs
Looong01 Aug 15, 2025
ac08f41
Update Readme
Looong01 Aug 15, 2025
9561e89
Update comments
Looong01 Aug 15, 2025
1e09787
Check again
Looong01 Aug 15, 2025
26df7d3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 15, 2025
06df3f2
Update changlog for rocm
Looong01 Aug 15, 2025
27fd9e0
Add external repo for rocm backend
Looong01 Aug 15, 2025
8747ff8
[ROCm] import third_party/libhipcxx automatically
Looong01 Aug 15, 2025
37449b1
Fix bugs
Looong01 Aug 19, 2025
b2f1dc4
Merge branch 'master' into master
akihironitta Sep 20, 2025
d199339
Initial plan
Copilot Dec 21, 2025
2c43f97
Implement matmul_kernel using Composable Kernel (CK) for ROCm
Copilot Dec 21, 2025
907421d
Fix StreamConfig namespace - use ck::StreamConfig
Copilot Dec 21, 2025
df09ba4
Add comment about kernel parameters following CUDA CUTLASS pattern
Copilot Dec 21, 2025
b8108b1
Update matmul_kernel_ck.hip to use official AMD CK API with GemmDesc
Copilot Dec 21, 2025
e8526d6
Merge pull request #1 from Looong01/copilot/implement-matmul-composab…
Looong01 Dec 21, 2025
ae9c19e
Upgrade CK implement
Feb 13, 2026
b65320b
update readme
Feb 13, 2026
8a08ccf
update gitmodules
Feb 13, 2026
b85de78
Remove hipCollections and libhipcxx submodules
Feb 13, 2026
deeb1c3
Add hipCollections and libhipcxx as submodules
Feb 13, 2026
9352cd5
Upgrade CK backend and fix bugs
Feb 14, 2026
b4aa59d
Fix bugs
Feb 14, 2026
474f6a8
Update README
Feb 14, 2026
e894271
Resolve conflicts
Feb 14, 2026
314e880
Resolve conflicts
Feb 14, 2026
69207c8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2026
c81fa1b
Merge branch 'master' into master
Looong01 Feb 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@
[submodule "third_party/cccl"]
path = third_party/cccl
url = https://github.com/NVIDIA/cccl.git
[submodule "third_party/hipCollections"]
path = third_party/hipCollections
url = https://github.com/ROCm/hipCollections.git
[submodule "third_party/libhipcxx"]
path = third_party/libhipcxx
url = https://github.com/ROCm/libhipcxx.git
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added PyTorch 2.3 support ([#322](https://github.com/pyg-team/pyg-lib/pull/322))
- Added Windows support ([#315](https://github.com/pyg-team/pyg-lib/pull/315))
- Added macOS Apple Silicon support ([#310](https://github.com/pyg-team/pyg-lib/pull/310))
- Added ROCM 6.4 support ([#507](https://github.com/pyg-team/pyg-lib/pull/507))
### Changed
### Removed
- Removed Support for PyTorch 1.13-2.5 ([#532](https://github.com/pyg-team/pyg-lib/pull/532))
Expand Down
78 changes: 77 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
cmake_minimum_required(VERSION 3.18)
project(pyg)
if(WITH_ROCM)
if(DEFINED ENV{ROCM_PATH} AND NOT "$ENV{ROCM_PATH}" STREQUAL "")
set(_ROCM_ROOT "$ENV{ROCM_PATH}")
else()
set(_ROCM_ROOT "/opt/rocm")
endif()
set(CMAKE_C_COMPILER ${_ROCM_ROOT}/bin/hipcc CACHE FILEPATH "" FORCE)
set(CMAKE_CXX_COMPILER ${_ROCM_ROOT}/bin/hipcc CACHE FILEPATH "" FORCE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(THREADS_PREFER_PTHREAD_FLAG ON)
project(pyg LANGUAGES C CXX HIP)
else()
project(pyg)
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
Expand All @@ -10,6 +23,10 @@ option(BUILD_TEST "Enable testing" OFF)
option(BUILD_BENCHMARK "Enable benchmarks" OFF)
option(WITH_COV "Enable code coverage" OFF)
option(WITH_CUDA "Enable CUDA support" OFF)
option(WITH_ROCM "Enable ROCm/HIP support" OFF)
if (WITH_ROCM AND WITH_CUDA)
message(FATAL_ERROR "WITH_ROCM and WITH_CUDA cannot both be ON")
endif()

if(NOT WIN32 AND NOT DEFINED USE_CXX11_ABI)
find_package(Python3 COMPONENTS Interpreter REQUIRED)
Expand Down Expand Up @@ -75,11 +92,41 @@ if(WITH_CUDA)
include_directories(${CUCOLLECTIONS_DIR})
endif()

if (WITH_ROCM)
if (CMAKE_VERSION VERSION_LESS "3.23.1")
message(FATAL_ERROR "WITH_ROCM requires CMake >= 3.23.1 (now ${CMAKE_VERSION})")
endif()
enable_language(HIP)
add_definitions(-DWITH_ROCM)
find_package(Threads REQUIRED)
find_package(hip REQUIRED)
find_package(hipblaslt REQUIRED)
find_package(rocblas REQUIRED)
find_package(rocprim REQUIRED)
find_package(rocthrust REQUIRED)
find_package(composable_kernel REQUIRED)
set(LIBHIPCXX_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party/libhipcxx")
set(HIP_COLLECTIONS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/hipCollections/include)
find_package(libhipcxx REQUIRED CONFIG PATHS "${LIBHIPCXX_DIR}/lib/cmake/libhipcxx" NO_DEFAULT_PATH)
include_directories(${HIP_COLLECTIONS_DIR})
endif()

set(CSRC pyg_lib/csrc)
file(GLOB_RECURSE ALL_SOURCES ${CSRC}/*.cpp)
if (WITH_CUDA)
file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.cu)
endif()
if (WITH_ROCM)
file(GLOB_RECURSE ALL_SOURCES ${ALL_SOURCES} ${CSRC}/*.hip)
# CK half math utilities require implicit half conversions/operators.
# PyTorch ROCm compile flags may define __HIP_NO_HALF_* globally.
# Restrict macro undefinition to the CK matmul TU.
set(PYG_ROCM_MATMUL_HIP "${CMAKE_CURRENT_SOURCE_DIR}/pyg_lib/csrc/ops/rocm/matmul_kernel.hip")
if(EXISTS "${PYG_ROCM_MATMUL_HIP}")
set_source_files_properties("${PYG_ROCM_MATMUL_HIP}" PROPERTIES
COMPILE_OPTIONS "-U__HIP_NO_HALF_OPERATORS__;-U__HIP_NO_HALF_CONVERSIONS__")
endif()
endif()
add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES})
target_include_directories(${PROJECT_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
if(MKL_INCLUDE_FOUND)
Expand Down Expand Up @@ -110,6 +157,16 @@ if (NOT MSVC)
include_directories(${GKLIB_PATH})
include_directories("${METIS_DIR}/include")
add_subdirectory("${METIS_DIR}/libmetis")

if (WITH_ROCM)
if (TARGET metis)
set_target_properties(metis PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
if (TARGET GKlib)
set_target_properties(GKlib PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
endif()

target_link_libraries(${PROJECT_NAME} PRIVATE metis)
endif()

Expand All @@ -129,6 +186,25 @@ if(WITH_CUDA)
third_party/cccl/libcudacxx/include)
endif()

if (WITH_ROCM)
target_link_libraries(${PROJECT_NAME} PRIVATE Threads::Threads)
target_link_libraries(${PROJECT_NAME}
PRIVATE
hip::device
roc::hipblaslt
roc::rocblas
roc::rocthrust
libhipcxx::libhipcxx
)
if(TARGET composable_kernel::device_gemm_operations)
target_link_libraries(${PROJECT_NAME} PRIVATE composable_kernel::device_gemm_operations)
elseif(TARGET composablekernels::device_gemm_operations)
target_link_libraries(${PROJECT_NAME} PRIVATE composablekernels::device_gemm_operations)
else()
message(WARNING "composable_kernel::device_gemm_operations target not found; building with CK headers only")
endif()
endif()

set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")

if(BUILD_TEST)
Expand Down
89 changes: 88 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

We provide pre-built Python wheels for all major OS/PyTorch/CUDA combinations from Python 3.10 till 3.13, see [here](https://data.pyg.org/whl).

To install the wheels, simply run
To install the wheels for CPU/CUDA backend, simply run

```
pip install pyg-lib -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
Expand Down Expand Up @@ -53,6 +53,93 @@ The following combinations are supported:
| **Windows** | ✅ | ✅ | ✅ | ✅ | |
| **macOS** | ✅ | | | | |

For ROCM backend, there is an external [`pyg-rocm-build` repository](https://github.com/Looong01/pyg-rocm-build) provides wheels and detailed instructions on how to install PyG for ROCm.
If you have any questions about it, please open an issue [here](https://github.com/Looong01/pyg-rocm-build/issues).

**Note:** ROCM backend only support Linux up to now.

### Build from source on a ROCm machine (Linux)

The following steps build and install `pyg-lib` with ROCm/HIP support from source.
Ensure your ROCm installation includes `hipblaslt`, `rocblas`, `rocprim`,
`rocthrust`, and `composable_kernel`.

1. Install system build tools:

```bash
sudo apt update
sudo apt install -y build-essential python3-dev python3-pip cmake ninja-build
```

2. Install Python build dependencies:

```bash
python3 -m pip install --upgrade pip setuptools wheel ninja
```

3. Install a ROCm-enabled PyTorch build (matching your ROCm stack):

```bash
# Example:
# python3 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm6.3
```

4. Configure environment variables:

```bash
export ROCM_PATH=/opt/rocm
export CMAKE_PREFIX_PATH="${ROCM_PATH};${ROCM_PATH}/lib/cmake"
export FORCE_ROCM=1
export FORCE_CUDA=0

# Set your GPU architecture, for example gfx90a/gfx942/gfx1100:
export PYTORCH_ROCM_ARCH="gfx1100;gfx950;gfx942;gfx90a;gfx908;gfx1201;gfx1101;gfx1030"
# Alternatively, you can use:
# export AMDGPU_TARGETS=gfx90a;gfx950;gfx942;gfx90a;gfx908;gfx1201;gfx1101;gfx1030
# If your hipcc does not recognize one of the targets, remove that target.

# Optional: disable CK grouped matmul path (enabled by default).
# export PYG_ROCM_MATMUL_USE_CK=0
# Optional: require CK path (fail fast if fallback would happen).
# export PYG_ROCM_MATMUL_REQUIRE_CK=1
```

`grouped_matmul` / `segment_matmul` behavior on ROCm:

- **Important:** The CK backend in `pyg-lib` only provides native kernels for
`bf16` and `fp16`.
- `fp16` input: use CK FP16 grouped GEMM path.
- `bf16` input: use CK BF16 grouped GEMM path.
- `fp32` input: CK does not run native FP32 kernels. `pyg-lib` first converts
to `bf16` and tries CK BF16, then converts to `fp16` and tries CK FP16.
- Since `fp32` uses reduced-precision conversion on the CK path, numerical
differences at `bf16/fp16` precision are expected.
- `PYG_ROCM_MATMUL_USE_CK=0`: disable CK grouped matmul and use ATen matmul.
- `PYG_ROCM_MATMUL_REQUIRE_CK=1`: strict mode. If no CK path is accepted, an
error is raised instead of falling back.
- Without strict mode, unsupported CK shapes/targets fall back to `at::mm_out`
with a warning that includes the reason.
- On architectures without CK XDL support for the selected path (for example
some `gfx10` targets), fallback warnings are expected.

5. Build and install:

```bash
python3 -m pip install -v .
```

For editable/development install:

```bash
python3 -m pip install -v -e .
```

Optional check:

```bash
python3 -c "import torch; print(torch.version.hip)"
```

### From nightly

Nightly wheels are provided for Linux from Python 3.10 till 3.13:
Expand Down
5 changes: 5 additions & 0 deletions benchmark/sampler/neighbor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
default=['pyg-lib', 'torch-sparse', 'dgl'])
args = argparser.parse_args()

if not args.directed and 'pyg-lib' in args.libraries:
raise ValueError(
"pyg-lib neighbor sampling does not support directed=False. "
"Run with --directed, or remove pyg-lib from --libraries.")


@withSeed
@withDataset('DIMACS10', 'citationCiteseer')
Expand Down
Loading
Loading