Skip to content

Commit 544875e

Browse files
authored
add test cuda workflow (#2848)
Signed-off-by: Jinzhe Zeng <[email protected]>
1 parent 80b2195 commit 544875e

File tree

7 files changed

+117
-5
lines changed

7 files changed

+117
-5
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
on:
2+
pull_request_target:
3+
types:
4+
- "labeled"
5+
name: Test CUDA
6+
jobs:
7+
remove_label:
8+
permissions:
9+
contents: read
10+
pull-requests: write
11+
# so one can re-trigger the workflow without manually removing the label
12+
runs-on: ubuntu-latest
13+
if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA'
14+
steps:
15+
- uses: actions-ecosystem/action-remove-labels@v1
16+
with:
17+
labels: Test CUDA
18+
number: ${{ github.event.pull_request.number }}

.github/workflows/test_cuda.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
on:
2+
# manually trigger
3+
workflow_dispatch:
4+
pull_request:
5+
types:
6+
- "labeled"
7+
name: Test CUDA
8+
jobs:
9+
test_cuda:
10+
name: Test Python and C++ on CUDA
11+
runs-on: nvidia
12+
if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch'
13+
steps:
14+
- uses: actions/checkout@v4
15+
- uses: actions/setup-python@v4
16+
with:
17+
python-version: '3.11'
18+
cache: 'pip'
19+
- name: Setup MPI
20+
uses: mpi4py/setup-mpi@v1
21+
with:
22+
mpi: mpich
23+
- uses: lukka/get-cmake@latest
24+
- run: |
25+
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
26+
&& sudo dpkg -i cuda-keyring_1.0-1_all.deb \
27+
&& sudo apt-get update \
28+
&& sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8
29+
- run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
30+
- run: pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip"
31+
env:
32+
DP_BUILD_TESTING: 1
33+
DP_VARIANT: cuda
34+
CUDA_PATH: /usr/local/cuda-11.8
35+
- run: dp --version
36+
- run: pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0
37+
- run: source/install/test_cc_local.sh
38+
env:
39+
OMP_NUM_THREADS: 1
40+
TF_INTRA_OP_PARALLELISM_THREADS: 1
41+
TF_INTER_OP_PARALLELISM_THREADS: 1
42+
LMP_CXX11_ABI_0: 1
43+
CMAKE_GENERATOR: Ninja
44+
DP_VARIANT: cuda
45+
DP_USE_MPICH2: 1
46+
CUDA_PATH: /usr/local/cuda-11.8
47+
- run: |
48+
export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH
49+
export PATH=${{ github.workspace }}/dp_test/bin:$PATH
50+
pytest -s --cov=deepmd source/lmp/tests
51+
pytest -s --cov=deepmd source/ipi/tests
52+
env:
53+
OMP_NUM_THREADS: 1
54+
TF_INTRA_OP_PARALLELISM_THREADS: 1
55+
TF_INTER_OP_PARALLELISM_THREADS: 1
56+
LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
57+
CUDA_PATH: /usr/local/cuda-11.8
58+
- uses: codecov/codecov-action@v3
59+
with:
60+
gcov: true

doc/development/cicd.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# CI/CD
2+
3+
<!-- TODO: To be written... -->
4+
5+
## CI
6+
7+
<!-- TODO: To be written... -->
8+
9+
### Test CUDA
10+
11+
`Test CUDA` action runs tests on a self-hosted runner with the NVIDIA card. It is not triggered by every PR. The developer who has the permission to manage the label can apply the label `Test CUDA` to a PR to trigger this action.
12+
13+
<!-- ## CD -->
14+
15+
<!-- TODO: To be written... -->

doc/index.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
5252
.. toctree::
5353
:maxdepth: 2
5454
:caption: Tutorial
55-
:glob:
5655

5756
Tutorials <https://tutorials.deepmodeling.com/>
5857
Publications <https://deepmodeling.com/blog/papers/deepmd-kit/>
@@ -62,9 +61,12 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
6261
.. toctree::
6362
:maxdepth: 5
6463
:caption: Developer Guide
65-
:glob:
6664

67-
development/*
65+
development/cmake
66+
development/create-a-model
67+
development/type-embedding
68+
development/coding-conventions
69+
development/cicd
6870
api_py/api_py
6971
api_op
7072
API_CC/api_cc

source/install/test_cc.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
set -e
22

3+
if [ "$DP_VARIANT" = "cuda" ]; then
4+
CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
5+
elif [ "$DP_VARIANT" = "rocm" ]; then
6+
CUDA_ARGS="-DUSE_ROCM_TOOLKIT=TRUE"
7+
fi
8+
39
#------------------
410

511
SCRIPT_PATH=$(dirname $(realpath -s $0))
@@ -11,7 +17,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
1117
BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
1218
mkdir -p ${BUILD_TMP_DIR}
1319
cd ${BUILD_TMP_DIR}
14-
cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ..
20+
cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ${CUDA_ARGS} ..
1521
cmake --build . -j${NPROC}
1622
cmake --install .
1723
ctest --output-on-failure

source/install/test_cc_local.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
set -e
22

3+
if [ "$DP_VARIANT" = "cuda" ]; then
4+
CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
5+
elif [ "$DP_VARIANT" = "rocm" ]; then
6+
CUDA_ARGS="-DUSE_ROCM_TOOLKIT=TRUE"
7+
fi
8+
39
#------------------
410

511
SCRIPT_PATH=$(dirname $(realpath -s $0))
@@ -12,7 +18,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
1218
BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
1319
mkdir -p ${BUILD_TMP_DIR}
1420
cd ${BUILD_TMP_DIR}
15-
cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ..
21+
cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ${CUDA_ARGS} ..
1622
cmake --build . -j${NPROC}
1723
cmake --install .
1824
ctest --output-on-failure

source/lmp/plugin/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
1919

2020
target_include_directories(lammps_interface INTERFACE ${LAMMPS_HEADER_DIR})
2121

22+
if("$ENV{DP_USE_MPICH2}" STREQUAL "1")
23+
# See https://stackoverflow.com/a/47976518/9567349
24+
set(MPI_EXECUTABLE_SUFFIX ".mpich")
25+
endif()
26+
2227
find_package(MPI)
2328
if(MPI_FOUND)
2429
set(LAMMPS_MPI_INCLUDE_DIRS ${MPI_CXX_INCLUDE_DIRS})

0 commit comments

Comments
 (0)