Skip to content

Commit 2a605a2

Browse files
authored
Adding GPU target capability and creating parent test workflow (ROCm#193)
Organizing GH action setup to enable multiple tests to run in parallel rocPRIM and hipBLASlt will be added shortly after this PR goes through this PR will default all GH action builds to gfx942 as we only have mi300s so far, and certain tests require specific GFX matching targets Closes ROCm#225
1 parent 02187df commit 2a605a2

File tree

6 files changed

+128
-115
lines changed

6 files changed

+128
-115
lines changed

.github/workflows/build_linux_packages.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,18 @@ on:
66
package_version:
77
type: string
88
default: ADHOCBUILD
9+
amdgpu_families:
10+
type: string
11+
default: gfx94X-dcgpu
912

1013
workflow_call:
1114
inputs:
1215
package_version:
1316
type: string
1417
default: ADHOCBUILD
18+
amdgpu_families:
19+
type: string
20+
default: gfx94X-dcgpu
1521

1622
jobs:
1723
build_linux_packages:
@@ -64,12 +70,13 @@ jobs:
6470
run: |
6571
# Generate a new build id.
6672
package_version="${{ inputs.package_version }}"
73+
amdgpu_families="${{ inputs.amdgpu_families }}"
6774
echo "Building package ${package_version}"
6875
6976
# Build.
7077
cmake -B build -GNinja . \
7178
-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
72-
-DTHEROCK_AMDGPU_FAMILIES=gfx110X-dgpu \
79+
-DTHEROCK_AMDGPU_FAMILIES=${amdgpu_families} \
7380
-DTHEROCK_PACKAGE_VERSION="${package_version}" \
7481
-DTHEROCK_VERBOSE=ON
7582
./build_tools/watch_top_processes.sh &

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
- main
88
workflow_dispatch:
99
inputs:
10+
amdgpu_families:
11+
type: string
12+
default: gfx94X-dcgpu
1013

1114
permissions:
1215
contents: read
@@ -25,6 +28,8 @@ jobs:
2528
id-token: write
2629
name: Build Linux Packages
2730
uses: ./.github/workflows/build_linux_packages.yml
31+
with:
32+
amdgpu_families: "${{ inputs.amdgpu_families != '' && inputs.amdgpu_families || 'gfx94X-dcgpu' }}"
2833

2934
build_windows_packages:
3035
name: Build Windows Packages
@@ -34,6 +39,8 @@ jobs:
3439
needs: build_linux_packages
3540
name: Test Linux Packages
3641
uses: ./.github/workflows/test_linux_packages.yml
42+
with:
43+
amdgpu_families: "${{ inputs.amdgpu_families != '' && inputs.amdgpu_families || 'gfx94X-dcgpu' }}"
3744

3845
# build_python_packages:
3946
# name: Build Python Packages
Lines changed: 13 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
# Copyright 2025 Advanced Micro Devices, Inc.
2-
#
3-
# Licensed under the Apache License v2.0 with LLVM Exceptions.
4-
# See https://llvm.org/LICENSE.txt for license information.
5-
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6-
71
name: Test Linux Packages
82

93
on:
@@ -12,75 +6,21 @@ on:
126
artifact_run_id:
137
type: string
148
default: ""
9+
amdgpu_families:
10+
type: string
11+
default: gfx94X-dcgpu
12+
workflow_call:
13+
inputs:
14+
amdgpu_families:
15+
type: string
16+
default: gfx94X-dcgpu
1517
push:
1618
branches:
1719
- ADHOCBUILD
18-
workflow_call:
1920

2021
jobs:
21-
test_rocm_info:
22-
name: "Sanity ROCM Test"
23-
runs-on: linux-mi300-1gpu-ossci-rocm
24-
strategy:
25-
fail-fast: true
26-
defaults:
27-
run:
28-
shell: bash
29-
env:
30-
VENV_DIR: ${{ github.workspace }}/.venv
31-
ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}"
32-
33-
# MOSTLY BOILER PLATE ABOVE.
34-
steps:
35-
- name: Checkout Repository
36-
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
37-
38-
- name: Install the AWS tool
39-
run: ./dockerfiles/cpubuilder/install_awscli.sh
40-
41-
- name: "Setting up Python"
42-
id: setup_python
43-
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
44-
with:
45-
python-version: 3.11
46-
47-
- name: Create Python venv
48-
run: |
49-
python -m venv ${VENV_DIR}
50-
source ${VENV_DIR}/bin/activate
51-
pip install -r requirements-test.txt
52-
53-
- name: Download and Unpack Artifacts
54-
run: |
55-
source ${VENV_DIR}/bin/activate
56-
57-
BUILD_ARTIFACTS_DIR="build/artifacts"
58-
# TODO(sosa): We should infer this from the runner. For Libs and Above it should be based on the
59-
# runner, everything else is generic. Use just generic since this is all core/below.
60-
VARIANT="_generic"
61-
echo "Making ${BUILD_ARTIFACTS_DIR} directory"
62-
mkdir -p "${BUILD_ARTIFACTS_DIR}"
63-
64-
echo "Downloading artifacts"
65-
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/core-runtime_run"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
66-
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/core-runtime_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
67-
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/sysdeps_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
68-
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/base_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
69-
70-
echo "Unpacking artifacts"
71-
pushd "${BUILD_ARTIFACTS_DIR}"
72-
mkdir output_dir
73-
python ${{ github.workspace }}/build_tools/fileset_tool.py artifact-flatten *.tar.xz -o output_dir --verbose
74-
sudo rsync --archive --keep-dirlinks --verbose output_dir/* /
75-
popd
76-
77-
- name: Run rocminfo
78-
run: |
79-
echo "Running rocminfo"
80-
/bin/rocminfo
81-
82-
- name: Run ROCm Sanity Tests
83-
run: |
84-
source ${VENV_DIR}/bin/activate
85-
pytest tests/ \
86-
--log-cli-level=info
22+
test_sanity_check:
23+
name: 'Test Sanity Check'
24+
uses: './.github/workflows/test_sanity_check.yml'
25+
with:
26+
artifact_run_id: ${{ inputs.artifact_run_id }}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
name: TheRock Sanity Check
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
artifact_run_id:
7+
type: string
8+
default: ""
9+
workflow_call:
10+
inputs:
11+
artifact_run_id:
12+
type: string
13+
default: ""
14+
push:
15+
branches:
16+
- ADHOCBUILD
17+
18+
jobs:
19+
test_sanity_check:
20+
name: "Sanity ROCM Test"
21+
runs-on: linux-mi300-1gpu-ossci-rocm
22+
strategy:
23+
fail-fast: false
24+
defaults:
25+
run:
26+
shell: bash
27+
env:
28+
VENV_DIR: ${{ github.workspace }}/.venv
29+
ARTIFACT_RUN_ID: "${{ inputs.artifact_run_id != '' && inputs.artifact_run_id || github.run_id }}"
30+
BUILD_ARTIFACTS_DIR: "${{ github.workspace }}/build/artifacts"
31+
THEROCK_BIN_DIR: "${{ github.workspace }}/build/artifacts/output_dir/bin"
32+
33+
steps:
34+
- name: Checkout Repository
35+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
36+
37+
- name: Install the AWS tool
38+
run: ./dockerfiles/cpubuilder/install_awscli.sh
39+
40+
41+
- name: "Setting up Python"
42+
id: setup_python
43+
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
44+
with:
45+
python-version: 3.11
46+
47+
- name: Create Python venv
48+
run: |
49+
python -m venv ${VENV_DIR}
50+
source ${VENV_DIR}/bin/activate
51+
pip install -r requirements-test.txt
52+
53+
- name: Download and Unpack Artifacts
54+
run: |
55+
source ${VENV_DIR}/bin/activate
56+
57+
# TODO(sosa): We should infer this from the runner. For Libs and Above it should be based on the
58+
# runner, everything else is generic. Use just generic since this is all core/below.
59+
VARIANT="_generic"
60+
echo "Making ${BUILD_ARTIFACTS_DIR} directory"
61+
mkdir -p "${BUILD_ARTIFACTS_DIR}"
62+
63+
echo "Downloading artifacts"
64+
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/core-runtime_run"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
65+
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/core-runtime_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
66+
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/sysdeps_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
67+
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/base_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
68+
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/amd-llvm_run"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
69+
aws s3 cp s3://therock-artifacts/${ARTIFACT_RUN_ID}/amd-llvm_lib"${VARIANT}".tar.xz "${BUILD_ARTIFACTS_DIR}" --no-sign-request
70+
71+
echo "Unpacking artifacts"
72+
mkdir ${BUILD_ARTIFACTS_DIR}/output_dir
73+
python ${{ github.workspace }}/build_tools/fileset_tool.py artifact-flatten ${BUILD_ARTIFACTS_DIR}/*.tar.xz -o ${BUILD_ARTIFACTS_DIR}/output_dir --verbose
74+
75+
- name: Run rocminfo
76+
run: |
77+
${THEROCK_BIN_DIR}/rocminfo
78+
79+
- name: Run ROCm Sanity Tests
80+
run: |
81+
source ${VENV_DIR}/bin/activate
82+
pytest tests/ \
83+
--log-cli-level=info
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ int main() {
1313
hipMalloc(&d_buf, size * sizeof(int));
1414
hipLaunchKernelGGL(squares, gridsize, blocksize, 0, 0, d_buf);
1515
hipDeviceSynchronize();
16+
return 0;
1617
}

tests/test_rocm_sanity.py

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,24 @@
44
from pathlib import Path
55
from pytest_check import check
66
import logging
7+
import os
78

89
THIS_DIR = Path(__file__).resolve().parent
910

1011
logger = logging.getLogger(__name__)
1112

13+
THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR")
1214

13-
def run_command(command):
14-
process = subprocess.run(command, capture_output=True)
15-
return str(process.stdout)
1615

17-
18-
@pytest.fixture(scope="session")
19-
def rocm_info_output():
20-
try:
21-
return run_command(["rocminfo"])
22-
except Exception as e:
23-
logger.info(str(e))
24-
return None
16+
def run_command(command, cwd=None):
17+
process = subprocess.run(command, capture_output=True, cwd=cwd)
18+
return process
2519

2620

2721
@pytest.fixture(scope="session")
28-
def clinfo_info_output():
22+
def rocm_info_output():
2923
try:
30-
return run_command(["clinfo"])
24+
return str(run_command([f"{THEROCK_BIN_DIR}/rocminfo"]).stdout)
3125
except Exception as e:
3226
logger.info(str(e))
3327
return None
@@ -55,38 +49,19 @@ def test_rocm_output(self, rocm_info_output, to_search):
5549
f"Failed to search for {to_search} in rocminfo output",
5650
)
5751

58-
@pytest.mark.parametrize(
59-
"to_search",
60-
[
61-
(r"Device(\s|\\t)*Type:(\s|\\t)*CL_DEVICE_TYPE_GPU"),
62-
(r"Name:(\s|\\t)*gfx"),
63-
(r"Vendor:(\s|\\t)*Advanced Micro Devices, Inc."),
64-
],
65-
ids=[
66-
"clinfo - GPU Device Type Search",
67-
"clinfo - GFX Name Search",
68-
"clinfo - AMD Vendor Name Search",
69-
],
70-
)
71-
def test_clinfo_output(self, clinfo_info_output, to_search):
72-
if not clinfo_info_output:
73-
pytest.fail("Command clinfo failed to run")
74-
check.is_not_none(
75-
re.search(to_search, clinfo_info_output),
76-
f"Failed to search for {to_search} in clinfo output",
77-
)
78-
7952
def test_hip_printf(self):
8053
# Compiling .cpp file using hipcc
8154
run_command(
8255
[
83-
"hipcc",
84-
str(THIS_DIR / "hip_printf.cpp"),
56+
"./hipcc",
57+
str(THIS_DIR / "hipcc_check.cpp"),
8558
"-o",
86-
str(THIS_DIR / "hip_printf"),
87-
]
59+
str(THIS_DIR / "hipcc_check"),
60+
],
61+
cwd=str(THEROCK_BIN_DIR),
8862
)
8963

90-
# Running the executable
91-
output = run_command([str(THIS_DIR / "hip_printf")])
92-
check.is_not_none(re.search(r"Thread.*is\swriting", output))
64+
# Running and checking the executable
65+
process = run_command(["./hipcc_check"], cwd=str(THIS_DIR))
66+
check.equal(process.returncode, 0)
67+
check.greater(os.path.getsize(str(THIS_DIR / "hipcc_check")), 0)

0 commit comments

Comments
 (0)