diff --git a/test/therock/hipdnn_install_tests/CMakeLists.txt b/test/therock/hipdnn_install_tests/CMakeLists.txt new file mode 100644 index 000000000000..122c851c3f9c --- /dev/null +++ b/test/therock/hipdnn_install_tests/CMakeLists.txt @@ -0,0 +1,33 @@ +# Copyright (c) Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +cmake_minimum_required(VERSION 3.25) +project(hipdnn_install_tests LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 17) + +enable_testing() + +function(add_hipdnn_install_test) + cmake_parse_arguments(ARG "" "PACKAGE;HEADER" "" ${ARGN}) + + find_package(${ARG_PACKAGE} CONFIG REQUIRED) + message(STATUS "${ARG_PACKAGE} found") + + set(test_name "test_${ARG_PACKAGE}") + set(header "${ARG_HEADER}") + set(pkg_name "${ARG_PACKAGE}") + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/test_template.cpp.in" + "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.cpp" + @ONLY + ) + add_executable(${test_name} "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.cpp") + target_link_libraries(${test_name} PRIVATE ${ARG_PACKAGE}) + add_test(NAME ${test_name} COMMAND ${test_name}) +endfunction() + +add_hipdnn_install_test(PACKAGE hipdnn_backend HEADER hipdnn_backend.h) +add_hipdnn_install_test(PACKAGE hipdnn_data_sdk HEADER hipdnn_data_sdk/data_objects/graph_generated.h) +add_hipdnn_install_test(PACKAGE hipdnn_frontend HEADER hipdnn_frontend.hpp) +add_hipdnn_install_test(PACKAGE hipdnn_plugin_sdk HEADER hipdnn_plugin_sdk/PluginApi.h) +add_hipdnn_install_test(PACKAGE hipdnn_test_sdk HEADER hipdnn_test_sdk/utilities/Seeds.hpp) diff --git a/test/therock/hipdnn_install_tests/test_template.cpp.in b/test/therock/hipdnn_install_tests/test_template.cpp.in new file mode 100644 index 000000000000..009806a63082 --- /dev/null +++ b/test/therock/hipdnn_install_tests/test_template.cpp.in @@ -0,0 +1,10 @@ +// Copyright (c) Advanced Micro Devices, Inc. +// SPDX-License-Identifier: MIT + +#include <@header@> +#include + +int main() { + std::cout << "@pkg_name@: package found and linked successfully" << std::endl; + return 0; +} diff --git a/test/therock/test_hipblas.py b/test/therock/test_hipblas.py new file mode 100644 index 000000000000..3359569a147a --- /dev/null +++ b/test/therock/test_hipblas.py @@ -0,0 +1,60 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +import sys +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +# Importing is_asan from github_actions_api.py +sys.path.append(str(THEROCK_DIR / "build_tools" / "github_actions")) +from github_actions_api import is_asan + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +if is_asan(): + environ_vars["HSA_XNACK"] = "1" + +logging.basicConfig(level=logging.INFO) + +tests_to_exclude = [ + "*known_bug*", + "_/getrs*", + "_/getri_batched.solver*", + "_/gels_batched.solver*", +] + +exclusion_list = ":".join(tests_to_exclude) + +cmd = [ + f"{THEROCK_BIN_DIR}/hipblas-test", +] + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + cmd += [ + "--yaml", + f"{THEROCK_BIN_DIR}/hipblas_smoke.yaml", + f"--gtest_filter=-{exclusion_list}", + ] +else: + # TODO(#2616): Enable full tests once known machine issues are resolved + cmd += [f"--gtest_filter=*pre_checkin*-{exclusion_list}"] + + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +result = subprocess.run(cmd, cwd=THEROCK_DIR, env=environ_vars) diff --git a/test/therock/test_hipblaslt.py b/test/therock/test_hipblaslt.py new file mode 100644 index 000000000000..7ba6406409e6 --- /dev/null +++ b/test/therock/test_hipblaslt.py @@ -0,0 +1,52 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +import sys +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +platform = os.getenv("RUNNER_OS").lower() +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +# Importing is_asan from github_actions_api.py +sys.path.append(str(THEROCK_DIR / "build_tools" / "github_actions")) +from github_actions_api import is_asan + +logging.basicConfig(level=logging.INFO) + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +if is_asan(): + environ_vars["HSA_XNACK"] = "1" + environ_vars["OMP_NUM_THREADS"] = "1" + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") + +# Only run quick tests (less memory intensive) for Windows strix-halo, issue: https://github.com/ROCm/TheRock/issues/1750 +if AMDGPU_FAMILIES == "gfx1151" and platform == "windows": + test_type = "quick" + +test_filter = [] +if test_type == "quick": + test_filter.append("--gtest_filter=*smoke*") +elif test_type == "quick": + test_filter.append("--gtest_filter=*quick*") + +cmd = [f"{THEROCK_BIN_DIR}/hipblaslt-test"] + test_filter + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_hipcub.py b/test/therock/test_hipcub.py new file mode 100644 index 000000000000..58c09fd21ab5 --- /dev/null +++ b/test/therock/test_hipcub.py @@ -0,0 +1,118 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +import platform +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +QUICK_TESTS = [ + "*ShuffleTests/*.*", + "*WarpStoreTest/*.*", + "AdjacentDifference/*.*", + "AdjacentDifferenceSubtract/*.*", + "BatchCopyTests/*.*", + "BatchMemcpyTests/*.*", + "BlockScan*", + "DeviceScanTests/*.*", + "Discontinuity/*.*", + "DivisionOperatorTests/*.*", + "ExchangeTests", + "GridTests/*.*", + "HistogramEven/*.*", + "HistogramInputArrayTests/*.*", + "HistogramRange/*.*", + "IteratorTests/*.*", + "LoadStoreTestsDirect/*.*", + "LoadStoreTestsStriped/*.*", + "LoadStoreTestsTranspose/*.*", + "LoadStoreTestsVectorize/*.*", + "MergeSort/*.*", + "NCThreadOperatorsTests/*", + "RadixRank/*.*", + "RadixSort/*.*", + "ReduceArgMinMaxSpecialTests/*.*", + "ReduceInputArrayTests/*.*", + "ReduceLargeIndicesTests/*.*", + "ReduceSingleValueTests/*.*", + "ReduceTests/*.*", + "RunLengthDecodeTest/*.*", + "RunLengthEncode/*.*", + "SegmentedReduce/*.*", + "SegmentedReduceArgMinMaxSpecialTests/*.*", + "SegmentedReduceOp/*.*", + "SelectTests/*.*", + "ThreadOperationTests/*.*", + "ThreadOperatorsTests/*.*", + "UtilPtxTests/*.*", + "WarpExchangeTest/*.*", + "WarpLoadTest/*.*", + "WarpMergeSort/*.*", + "WarpReduceTests/*.*", + "WarpScanTests*", +] + +# Generate the resource spec file for ctest +rocm_base = Path(THEROCK_BIN_DIR).resolve().parent +ld_paths = [ + rocm_base / "lib", +] +ld_paths_str = os.pathsep.join(str(p) for p in ld_paths) +existing_path = os.environ.get("PATH", "") +existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "") +env_vars = os.environ.copy() +env_vars["PATH"] = ( + f"{THEROCK_BIN_DIR}{os.pathsep}{existing_path}" + if existing_path + else THEROCK_BIN_DIR +) +env_vars["ROCM_PATH"] = str(rocm_base) +env_vars["LD_LIBRARY_PATH"] = ( + f"{ld_paths_str}{os.pathsep}{existing_ld_path}" + if existing_ld_path + else ld_paths_str +) + +is_windows = platform.system() == "Windows" +exe_name = "generate_resource_spec.exe" if is_windows else "generate_resource_spec" +exe_dir = rocm_base / "bin" / "hipcub" +resource_spec_file = "resources.json" + +res_gen_cmd = [ + str(exe_dir / exe_name), + str(exe_dir / resource_spec_file), +] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(res_gen_cmd)}") +subprocess.run(res_gen_cmd, cwd=THEROCK_DIR, check=True, env=env_vars) + +# Run ctest with resource spec file +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/hipcub", + "--output-on-failure", + "--parallel", + "8", + "--resource-spec-file", + resource_spec_file, + "--timeout", + "300", +] + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +environ_vars = os.environ.copy() +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + environ_vars["GTEST_FILTER"] = ":".join(QUICK_TESTS) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_hipdnn.py b/test/therock/test_hipdnn.py new file mode 100644 index 000000000000..432efb73f22b --- /dev/null +++ b/test/therock/test_hipdnn.py @@ -0,0 +1,34 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") + +logging.basicConfig(level=logging.INFO) + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/hipdnn", + "--output-on-failure", + "--parallel", + "8", + "--timeout", + "60", +] + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, +) diff --git a/test/therock/test_hipdnn_install.py b/test/therock/test_hipdnn_install.py new file mode 100644 index 000000000000..d1d48d0e3228 --- /dev/null +++ b/test/therock/test_hipdnn_install.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# Copyright (c) Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +""" +hipDNN installation consumption test. + +This test verifies that hipDNN packages built by TheRock can be properly +consumed by external projects using CMake's find_package. It tests the +CMake packaging/installation correctness, not hipDNN functionality. +""" + +import argparse +import logging +import os +import platform +import shlex +import subprocess +import tempfile +from pathlib import Path + +OUTPUT_ARTIFACTS_DIR = os.getenv("OUTPUT_ARTIFACTS_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent +TEST_PROJECT_DIR = SCRIPT_DIR / "hipdnn_install_tests" + +logging.basicConfig(level=logging.INFO) + + +def run_tests(build_dir: Path): + """Configure, build, and test all hipDNN packages.""" + # Locally, can set OUTPUT_ARTIFACTS_DIR=build/dist/rocm for testing + artifacts_path = Path(OUTPUT_ARTIFACTS_DIR).resolve() + is_windows = platform.system() == "Windows" + + # Compiler extension differs by platform + compiler_ext = ".exe" if is_windows else "" + + # Set up environment variables for CMake/HIP + environ_vars = os.environ.copy() + environ_vars["HIP_PLATFORM"] = "amd" + + if is_windows: + # Set library path for runtime (needed when running the test executables) + rocm_lib = str(artifacts_path) + + # Windows uses PATH for DLL lookup + path_sep = ";" + if "PATH" in environ_vars: + environ_vars["PATH"] = f"{rocm_lib}{path_sep}{environ_vars['PATH']}" + else: + environ_vars["PATH"] = rocm_lib + else: + rocm_lib = str(artifacts_path / "lib") + + # Linux uses LD_LIBRARY_PATH + path_sep = ":" + if "LD_LIBRARY_PATH" in environ_vars: + environ_vars["LD_LIBRARY_PATH"] = ( + f"{rocm_lib}{path_sep}{environ_vars['LD_LIBRARY_PATH']}" + ) + else: + environ_vars["LD_LIBRARY_PATH"] = rocm_lib + + # We configure and build test projects externally (not during TheRock build) + # to emulate how a consumer would build against the installed hipDNN artifacts. + # This catches packaging issues that only manifest during external consumption. + configure_cmd = [ + "cmake", + "-B", + str(build_dir), + "-S", + str(TEST_PROJECT_DIR), + "-GNinja", + f"-DCMAKE_PREFIX_PATH={artifacts_path}", + f"-DCMAKE_CXX_COMPILER={artifacts_path}/lib/llvm/bin/clang++{compiler_ext}", + f"-DCMAKE_C_COMPILER={artifacts_path}/lib/llvm/bin/clang{compiler_ext}", + "--log-level=WARNING", + ] + + # Windows needs a resource compiler specified + if is_windows: + configure_cmd.append("-DCMAKE_RC_COMPILER=rc.exe") + logging.info(f"++ Configure: {shlex.join(configure_cmd)}") + subprocess.run(configure_cmd, check=True, cwd=THEROCK_DIR, env=environ_vars) + + build_cmd = ["cmake", "--build", str(build_dir)] + logging.info(f"++ Build: {shlex.join(build_cmd)}") + subprocess.run(build_cmd, check=True, cwd=THEROCK_DIR, env=environ_vars) + + test_cmd = [ + "ctest", + "--test-dir", + str(build_dir), + "--output-on-failure", + "--parallel", + "8", + "--timeout", + "120", + ] + logging.info(f"++ Test: {shlex.join(test_cmd)}") + subprocess.run(test_cmd, check=True, cwd=THEROCK_DIR, env=environ_vars) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Test hipDNN package installation and consumption" + ) + parser.add_argument( + "--build-dir", + type=Path, + help="Build directory path (will be created if doesn't exist). " + "If not specified, uses temporary directory that is auto-deleted.", + ) + args = parser.parse_args() + + if not OUTPUT_ARTIFACTS_DIR: + raise RuntimeError("OUTPUT_ARTIFACTS_DIR environment variable not set") + + logging.info(f"Using OUTPUT_ARTIFACTS_DIR: {OUTPUT_ARTIFACTS_DIR}") + + if args.build_dir: + build_dir = args.build_dir.resolve() + build_dir.mkdir(parents=True, exist_ok=True) + logging.info(f"Using persistent build directory: {build_dir}") + run_tests(build_dir) + logging.info(f"Build artifacts retained in: {build_dir}") + else: + logging.info("Using temporary build directory (auto-cleanup)") + with tempfile.TemporaryDirectory() as temp_dir: + run_tests(Path(temp_dir)) + + logging.info("All hipDNN install tests passed!") diff --git a/test/therock/test_hipdnn_samples.py b/test/therock/test_hipdnn_samples.py new file mode 100644 index 000000000000..0cbd4010c2d2 --- /dev/null +++ b/test/therock/test_hipdnn_samples.py @@ -0,0 +1,32 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/hipdnn_samples", + "--output-on-failure", + "--parallel", + "8", + "--timeout", + "60", +] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, +) diff --git a/test/therock/test_hipfft.py b/test/therock/test_hipfft.py new file mode 100644 index 000000000000..8b8ab0a40cd8 --- /dev/null +++ b/test/therock/test_hipfft.py @@ -0,0 +1,45 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + test_filter = ["--smoketest"] +else: + # "--test_prob" is the probability that a given test will run. + # Due to the large number of tests for hipFFT, we only run a subset. + test_filter = [ + "--gtest_filter=-*multi_gpu*", + "--test_prob", + "0.01", + ] + +cmd = [f"{THEROCK_BIN_DIR}/hipfft-test"] + test_filter +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, + env=environ_vars, +) diff --git a/test/therock/test_hiprand.py b/test/therock/test_hiprand.py new file mode 100644 index 000000000000..9c24872965cb --- /dev/null +++ b/test/therock/test_hiprand.py @@ -0,0 +1,32 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/hipRAND", + "--output-on-failure", + "--parallel", + "8", + "--timeout", + "60", +] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, +) diff --git a/test/therock/test_hipsolver.py b/test/therock/test_hipsolver.py new file mode 100644 index 000000000000..d73509a4365d --- /dev/null +++ b/test/therock/test_hipsolver.py @@ -0,0 +1,53 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +PLATFORM = os.getenv("PLATFORM") +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +envion_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +envion_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +envion_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +logging.basicConfig(level=logging.INFO) + +tests_to_exclude = [ + "*known_bug*", + "*HEEVD*float_complex*", + "*HEEVJ*float_complex*", + "*HEGVD*float_complex*", + "*HEGVJ*float_complex*", + "*HEEVDX*float_complex*", + "*SYTRF*float_complex*", + "*HEEVD*double_complex*", + "*HEEVJ*double_complex*", + "*HEGVD*double_complex*", + "*HEGVJ*double_complex*", + "*HEEVDX*double_complex*", + "*SYTRF*double_complex*", + # TODO(#2824): Re-enable test once flaky issue is resolved + "checkin_lapack/POTRF_FORTRAN.batched__float_complex/9", +] + +exclusion_list = ":".join(tests_to_exclude) + +cmd = [ + f"{THEROCK_BIN_DIR}/hipsolver-test", + f"--gtest_filter=-{exclusion_list}", +] + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=envion_vars) diff --git a/test/therock/test_hipsparse.py b/test/therock/test_hipsparse.py new file mode 100644 index 000000000000..82b6afd5e379 --- /dev/null +++ b/test/therock/test_hipsparse.py @@ -0,0 +1,61 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path +import platform + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +OUTPUT_ARTIFACTS_DIR = os.getenv("OUTPUT_ARTIFACTS_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +os_type = platform.system().lower() + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +logging.basicConfig(level=logging.INFO) + +TEST_TO_IGNORE = { + "gfx1151": { + # TODO(#3621): Include test once out of resource errors are resolved + "windows": ["*spmm*"] + }, +} + +environ_vars["HIPSPARSE_CLIENTS_MATRICES_DIR"] = ( + f"{OUTPUT_ARTIFACTS_DIR}/clients/matrices/" +) + +cmd = [f"{THEROCK_BIN_DIR}/hipsparse-test"] + +gtest_filter = "--gtest_filter=" + +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + gtest_filter += "*spmv*:*spsv*:*spsm*:*spmm*:*csric0*:*csrilu0*:-known_bug*" +else: + gtest_filter += "--gtest_filter=*quick*:-known_bug*" + +if AMDGPU_FAMILIES in TEST_TO_IGNORE and os_type in TEST_TO_IGNORE[AMDGPU_FAMILIES]: + ignored_tests = TEST_TO_IGNORE[AMDGPU_FAMILIES][os_type] + gtest_filter += ":" + ":".join(ignored_tests) + +cmd.append(gtest_filter) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, + env=environ_vars, +) diff --git a/test/therock/test_hipsparselt.py b/test/therock/test_hipsparselt.py new file mode 100644 index 000000000000..a7e77f746a93 --- /dev/null +++ b/test/therock/test_hipsparselt.py @@ -0,0 +1,38 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +OUTPUT_ARTIFACTS_DIR = os.getenv("OUTPUT_ARTIFACTS_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") + +test_filter = [] +if test_type == "quick": + test_filter.append("--gtest_filter=*smoke*") +elif test_type == "full": + test_filter.append("--gtest_filter=*quick*") + +cmd = [f"{THEROCK_BIN_DIR}/hipsparselt-test"] + test_filter + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_miopen.py b/test/therock/test_miopen.py new file mode 100644 index 000000000000..8dfea8edd88d --- /dev/null +++ b/test/therock/test_miopen.py @@ -0,0 +1,288 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path +import platform + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +os_type = platform.system().lower() + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +# Some of our runtime kernel compilations have been relying on either ROCM_PATH being set, or ROCm being installed at +# /opt/rocm. Neither of these is true in TheRock so we need to supply ROCM_PATH to our tests. +ROCM_PATH = Path(THEROCK_BIN_DIR).resolve().parent +environ_vars["ROCM_PATH"] = str(ROCM_PATH) + +logging.basicConfig(level=logging.INFO) + +########################################### + +positive_filter = [] +negative_filter = [] + +# Fusion # +positive_filter.append("*Fusion*") + +# Batch Normalization # +positive_filter.append("*/GPU_BNBWD*_*") +positive_filter.append("*/GPU_BNOCLBWD*_*") +positive_filter.append("*/GPU_BNFWD*_*") +positive_filter.append("*/GPU_BNOCLFWD*_*") +positive_filter.append("*/GPU_BNInfer*_*") +positive_filter.append("*/GPU_BNActivInfer_*") +positive_filter.append("*/GPU_BNOCLInfer*_*") +positive_filter.append("*/GPU_bn_infer*_*") + +# CPU tests +positive_filter.append("CPU_*") # tests without a suite +positive_filter.append("*/CPU_*") # tests with a suite + +# Different +positive_filter.append("*/GPU_Cat_*") +positive_filter.append("*/GPU_ConvBiasActiv*") + +# Convolutions +positive_filter.append("*/GPU_Conv*") +positive_filter.append("*/GPU_conv*") + +# Solvers +positive_filter.append("*/GPU_UnitTestConv*") + +# Misc + +positive_filter.append("*/GPU_GetitemBwd*") +positive_filter.append("*/GPU_GLU_*") + +positive_filter.append("*/GPU_GroupConv*") +positive_filter.append("*/GPU_GroupNorm_*") +positive_filter.append("*/GPU_GRUExtra_*") +positive_filter.append("*/GPU_TestActivation*") +positive_filter.append("*/GPU_HipBLASLtGEMMTest*") +positive_filter.append("*/GPU_KernelTuningNetTestConv*") +positive_filter.append("*/GPU_Kthvalue_*") +positive_filter.append("*/GPU_LayerNormTest*") +positive_filter.append("*/GPU_LayoutTransposeTest_*") +positive_filter.append("*/GPU_Lrn*") +positive_filter.append("*/GPU_lstm_extra*") + +positive_filter.append("*/GPU_MultiMarginLoss_*") +positive_filter.append("*/GPU_ConvNonpack*") +positive_filter.append("*/GPU_PerfConfig_HipImplicitGemm*") +positive_filter.append("*/GPU_AsymPooling2d_*") +positive_filter.append("*/GPU_WidePooling2d_*") +positive_filter.append("*/GPU_PReLU_*") +positive_filter.append("*/GPU_Reduce*") +positive_filter.append("*/GPU_reduce_custom_*") +positive_filter.append("*/GPU_regression_issue_*") +positive_filter.append("*/GPU_RNNExtra_*") +positive_filter.append("*/GPU_RoPE*") +positive_filter.append("*/GPU_SoftMarginLoss*") +positive_filter.append("*/GPU_T5LayerNormTest_*") +positive_filter.append("*/GPU_Op4dTensorGenericTest_*") +positive_filter.append("*/GPU_TernaryTensorOps_*") +positive_filter.append("*/GPU_unaryTensorOps_*") +positive_filter.append("*/GPU_Transformers*") +positive_filter.append("*/GPU_TunaNetTest_*") +positive_filter.append("*/GPU_UnitTestActivationDescriptor_*") +positive_filter.append("*/GPU_FinInterfaceTest*") +positive_filter.append("*/GPU_VecAddTest_*") + +positive_filter.append("*/GPU_KernelTuningNetTest*") + +positive_filter.append("*/GPU_Bwd_Mha_*") +positive_filter.append("*/GPU_Fwd_Mha_*") +positive_filter.append("*/GPU_Softmax*") +positive_filter.append("*/GPU_Dropout*") +positive_filter.append("*/GPU_MhaBackward_*") +positive_filter.append("*/GPU_MhaForward_*") +positive_filter.append("*GPU_TestMhaFind20*") +positive_filter.append("*/GPU_MIOpenDriver*") + +############################################# + +negative_filter.append("*DeepBench*") +negative_filter.append("*MIOpenTestConv*") + +# For sake of time saving on pre-commit step +#################################################### +negative_filter.append("Full/GPU_MIOpenDriverConv2dTransTest*") # 4 min 45 sec +negative_filter.append("Full/GPU_Reduce_FP64*") # 4 min 19 sec +negative_filter.append("Full/GPU_BNOCLFWDTrainSerialRun3D_BFP16*") # 3 min 37 sec +negative_filter.append("Full/GPU_Lrn_FP32*") # 2 min 50 sec +negative_filter.append("Full/GPU_Lrn_FP16*") # 2 min 20 sec +negative_filter.append("Full/GPU_BNOCLInferSerialRun3D_BFP16*") # 2 min 19 sec +negative_filter.append("Smoke/GPU_BNOCLFWDTrainLarge2D_BFP16*") # 1 min 55 sec +negative_filter.append("Smoke/GPU_BNOCLInferLarge2D_BFP16*") # 1 min 48 sec +negative_filter.append("Full/GPU_BNOCLBWDSerialRun3D_BFP16*") # 1 min 28 sec +negative_filter.append("Smoke/GPU_BNOCLBWDLarge2D_BFP16*") # 1 min 19 sec + +negative_filter.append("Full/GPU_UnitTestActivationDescriptor_FP32*") # 1 min 23 sec +negative_filter.append("Full/GPU_UnitTestActivationDescriptor_FP16*") # 1 min 0 sec + +negative_filter.append( + "Full/GPU_MIOpenDriverRegressionBigTensorTest_FP32*" +) # 0 min 59 sec + +negative_filter.append( + "Smoke/GPU_BNOCLBWDLargeFusedActivation2D_BFP16*" +) # 0 min 52 sec +negative_filter.append("Smoke/GPU_BNOCLBWDLargeFusedActivation2D_FP16*") # 0 min 49 sec + +negative_filter.append("Full/GPU_ConvGrpBiasActivInfer_BFP16*") # 0 min 40 sec +negative_filter.append("Full/GPU_ConvGrpBiasActivInfer_FP32*") # 0 min 38 sec +negative_filter.append("Full/GPU_ConvGrpBiasActivInfer_FP16*") # 0 min 25 sec + +negative_filter.append("Full/GPU_ConvGrpActivInfer_BFP16*") # 0 min 42 sec +negative_filter.append("Full/GPU_ConvGrpActivInfer_FP32*") # 0 min 35 sec +negative_filter.append("Full/GPU_ConvGrpActivInfer_FP16*") # 0 min 25 sec + +negative_filter.append("Full/GPU_ConvGrpBiasActivInfer3D_BFP16*") # 0 min 27 sec +negative_filter.append("Full/GPU_ConvGrpBiasActivInfer3D_FP32*") # 0 min 25 sec +negative_filter.append("Full/GPU_ConvGrpBiasActivInfer3D_FP16*") # 0 min 19 sec + +negative_filter.append("Full/GPU_ConvGrpActivInfer3D_BFP16*") # 0 min 27 sec +negative_filter.append("Full/GPU_ConvGrpActivInfer3D_FP32*") # 0 min 22 sec +negative_filter.append("Full/GPU_ConvGrpActivInfer3D_FP16*") # 0 min 16 sec + +# TODO(#3202): Re-enable tests once issues are resolved +TEST_TO_IGNORE = { + "gfx110X-all": { + "windows": [ + # Failed on gfx1103 + "Smoke/CPU_Handle_NONE.TestHIP/with_stream_false_test_id_0", + "Full/GPU_reduce_custom_fp32_fp16_FP32.FloatTest_reduce_custom_fp32_fp16/1", + "Full/GPU_reduce_custom_fp32_fp16_FP32.FloatTest_reduce_custom_fp32_fp16/5", + "Full/GPU_reduce_custom_fp32_fp16_FP32.FloatTest_reduce_custom_fp32_fp16/9", + "Full/GPU_reduce_custom_fp32_fp16_FP32.FloatTest_reduce_custom_fp32_fp16/13", + "Full/GPU_reduce_custom_fp32_fp16_FP32.FloatTest_reduce_custom_fp32_fp16/17", + "Full/GPU_reduce_custom_fp32_fp16_FP16.HalfTest_reduce_custom_fp32_fp16/1", + "Full/GPU_reduce_custom_fp32_fp16_FP16.HalfTest_reduce_custom_fp32_fp16/5", + "Full/GPU_reduce_custom_fp32_fp16_FP16.HalfTest_reduce_custom_fp32_fp16/9", + "Full/GPU_reduce_custom_fp32_fp16_FP16.HalfTest_reduce_custom_fp32_fp16/13", + "Full/GPU_reduce_custom_fp32_fp16_FP16.HalfTest_reduce_custom_fp32_fp16/17", + ] + }, + "gfx1151": { + "windows": ["Full/GPU_UnitTestConvSolverGemmBwdRestBwd_FP16.GemmBwdRest/0"] + }, + "gfx950-dcgpu": {"linux": ["*DBSync*"]}, +} + +if AMDGPU_FAMILIES in TEST_TO_IGNORE and os_type in TEST_TO_IGNORE[AMDGPU_FAMILIES]: + ignored_tests = TEST_TO_IGNORE[AMDGPU_FAMILIES][os_type] + negative_filter.extend(ignored_tests) + +# Failing on on win gfx110x +if any(prefix in AMDGPU_FAMILIES for prefix in ["gfx110"]): + negative_filter.append("*/GPU_MIOpenDriver*") + negative_filter.append("Smoke/CPU_Handle_NONE*") + negative_filter.append("Full/GPU_reduce_custom_fp32*") + +# Tests to be filtered for navi +# 1- Ignore gfx942 tests +# TODO: There is no FP32 wmma on Navi, remove all FP32 conv tests. These should already be skipped via applicability for +# CK solvers + +if any(prefix in AMDGPU_FAMILIES for prefix in ["gfx110", "gfx115", "gfx120"]): + # These are ignored in miopen + negative_filter.append( + "Smoke/GPU_BNFWDTrainLargeFusedActivation2D_FP32.BnV2LargeFWD_TrainCKfp32Activation/NCHW_BNSpatial_testBNAPIV1_Dim_2_test_id_32" + ) # Temporarily disabled until gfx1151 CI nodes have fw 31 or higher installed + negative_filter.append( + "Smoke/GPU_BNFWDTrainLarge2D_FP32.BnV2LargeFWD_TrainCKfp32/NCHW_BNSpatial_testBNAPIV2_Dim_2_test_id_64" + ) # Temporarily disabled until gfx1151 CI nodes have fw 31 or higher installed + # this could address 2 + negative_filter.append( + "*SerialRun3D*" + ) # These FP32 SerialRun3D tests use so much memory that they have a risk of timing out the machine during tests + # this could address 1 + negative_filter.append("*gfx942*") + # List of currently failing tests + negative_filter.append("*GPU_UnitTestConvSolverFFTFwd_FP32*") + negative_filter.append("*GPU_UnitTestConvSolverFFTBwd_FP32*") + negative_filter.append("*GPU_TernaryTensorOps_FP64*") + negative_filter.append("*GPU_TernaryTensorOps_FP16*") + negative_filter.append("*GPU_TernaryTensorOps_FP32*") + negative_filter.append("*GPU_Op4dTensorGenericTest_FP32*") + negative_filter.append("*GPU_UnitTestActivationDescriptor_FP16*") + negative_filter.append("*GPU_UnitTestActivationDescriptor_FP32*") + negative_filter.append("*CPU_TuningPolicy_NONE*") + negative_filter.append("*GPU_Dropout_FP32*") + negative_filter.append("*GPU_Dropout_FP16*") + + # TODO: We need to work to re-enable these + negative_filter.append( + "*GPU_GroupConv3D_BackwardData_FP16.GroupConv3D_BackwardData_half_Test*" + ) + negative_filter.append( + "*GPU_GroupConv3D_BackwardData_BFP16.GroupConv3D_BackwardData_bfloat16_Test*" + ) + negative_filter.append( + "*GPU_UnitTestConvSolverImplicitGemmGroupWrwXdlops_BFP16.ConvHipImplicitGemmGroupWrwXdlops*" + ) + + negative_filter.append("Smoke/GPU_MultiMarginLoss*") + + negative_filter.append( + "*CPU_UnitTestConvSolverImplicitGemmGroupWrwXdlopsDevApplicability_FP16.ConvHipImplicitGemmGroupWrwXdlops*" + ) + + # Disable long running tests + negative_filter.append("Full/GPU_Softmax_FP32*") # 24 min + negative_filter.append("Full/GPU_Softmax_BFP16*") # 13 min + negative_filter.append("Full/GPU_Softmax_FP16*") # 11.5 min + negative_filter.append("Smoke/GPU_Reduce_FP32*") # 6.5 min + negative_filter.append("Smoke/GPU_Reduce_FP16*") # 4.5 min + +#################################################### + +# Creating a quick test filter +quick_filter = [ + # Batch norm FWD smoke tests + "Smoke/GPU_BNCKFWDTrainLarge2D_FP16*", + "Smoke/GPU_BNOCLFWDTrainLarge2D_FP16*", + "Smoke/GPU_BNOCLFWDTrainLarge3D_FP16*", + "Smoke/GPU_BNCKFWDTrainLarge2D_BFP16*", + "Smoke/GPU_BNOCLFWDTrainLarge2D_BFP16*", + "Smoke/GPU_BNOCLFWDTrainLarge3D_BFP16*", + # CK Grouped FWD Conv smoke tests + "Smoke/GPU_UnitTestConvSolverImplicitGemmFwdXdlops_FP16*", + "Smoke/GPU_UnitTestConvSolverImplicitGemmFwdXdlops_BFP16*", +] + +# TODO(rocm-libraries#2266): re-enable test for gfx950-dcgpu +if AMDGPU_FAMILIES != "gfx950-dcgpu": + quick_filter.append("*DBSync*") + positive_filter.append("*DBSync*") + +#################################################### + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + test_filter = "--gtest_filter=" + ":".join(quick_filter) +else: + test_filter = ( + "--gtest_filter=" + ":".join(positive_filter) + "-" + ":".join(negative_filter) + ) +############################################# + +cmd = [f"{THEROCK_BIN_DIR}/miopen_gtest", test_filter] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_rocblas.py b/test/therock/test_rocblas.py new file mode 100644 index 000000000000..cb37cbd4a9a4 --- /dev/null +++ b/test/therock/test_rocblas.py @@ -0,0 +1,48 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +import sys +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +# Importing is_asan from github_actions_api.py +sys.path.append(str(THEROCK_DIR / "build_tools" / "github_actions")) +from github_actions_api import is_asan + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +if is_asan(): + environ_vars["HSA_XNACK"] = "1" + +logging.basicConfig(level=logging.INFO) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + test_filter = ["--yaml", f"{THEROCK_BIN_DIR}/rocblas_smoke.yaml"] +else: + # only running quick tests due to openBLAS issue: https://github.com/ROCm/TheRock/issues/1605 + test_filter = ["--yaml", f"{THEROCK_BIN_DIR}/rocblas_smoke.yaml"] + +cmd = [f"{THEROCK_BIN_DIR}/rocblas-test"] + test_filter +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, +) diff --git a/test/therock/test_rocfft.py b/test/therock/test_rocfft.py new file mode 100644 index 000000000000..3e5c726daa93 --- /dev/null +++ b/test/therock/test_rocfft.py @@ -0,0 +1,44 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + test_filter = ["--smoketest"] +else: + # "--test_prob" is the probability that a given test will run. + # Due to the large number of tests for rocFFT, we only run a subset. + test_filter = [ + "--gtest_filter=-*multi_gpu*", + "--test_prob", + "0.02", + ] + +cmd = [f"{THEROCK_BIN_DIR}/rocfft-test"] + test_filter +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, +) diff --git a/test/therock/test_rocprim.py b/test/therock/test_rocprim.py new file mode 100644 index 000000000000..312b3da7afc5 --- /dev/null +++ b/test/therock/test_rocprim.py @@ -0,0 +1,134 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path +import platform + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +os_type = platform.system().lower() + +logging.basicConfig(level=logging.INFO) + +TEST_TO_IGNORE = { + # TODO(#2836): Re-enable gfx110X tests once issues are resolved + "gfx110X-all": { + "windows": [ + "rocprim.block_discontinuity", + "rocprim.device_merge_sort", + "rocprim.device_reduce", + ] + }, + "gfx1151": { + "windows": [ + # TODO(#2836): Re-enable test once issues are resolved + "rocprim.device_merge_sort", + # TODO(#2836): Re-enable test once issues are resolved + "rocprim.device_radix_sort", + ] + }, +} + +QUICK_TESTS = [ + "*ArgIndexIterator", + "*BasicTests.GetVersion", + "*BatchMemcpyTests/*", + "*BlockScan", + "*ConfigDispatchTests.*", + "*ConstantIteratorTests/*", + "*CountingIteratorTests/*", + "*DeviceScanTests/*", + "*DiscardIteratorTests.Less", + "*ExchangeTests*", + "*FirstPart", + "*HipcubBlockRunLengthDecodeTest/*", + "*Histogram*", + "*HistogramAtomic*", + "*HistogramSortInput*", + "*IntrinsicsTests*", + "*InvokeResultBinOpTests/*", + "*InvokeResultUnOpTests/*", + "*MergeTests/*", + "*PartitionLargeInputTest/*", + "*PartitionTests/*", + "*PredicateIteratorTests.*", + "*RadixKeyCodecTest.*", + "*RadixMergeCompareTest/*", + "*RadixSort/*", + "*RadixSortIntegral/*", + "*ReduceByKey*", + "*ReduceInputArrayTestsFloating", + "*ReduceInputArrayTestsIntegral/*", + "*ReducePrecisionTests/*", + "*ReduceSingleValueTestsFloating", + "*ReduceSingleValueTestsIntegral", + "*ReduceTests/*", + "*ReverseIteratorTests.*", + "*RunLengthEncode/*", + "*SecondPart/*", + "*SegmentedReduce/*", + "*SelectLargeInputFlaggedTest/*", + "*SelectTests/*", + "*ShuffleTestsFloating/*", + "*ShuffleTestsIntegral*", + "*SortBitonicTestsIntegral/*", + "*ThirdPart/*", + "*ThreadOperationTests/*", + "*ThreadTests/*", + "*TransformIteratorTests/*", + "*TransformTests/*", + "*VectorizationTests*", + "*WarpExchangeScatterTest/*", + "*WarpExchangeTest/*", + "*WarpLoadTest/*", + "*WarpReduceTestsFloating/*", + "*WarpReduceTestsIntegral/*", + "*WarpScanTests*", + "*WarpSortShuffleBasedTestsIntegral/*", + "*ceIntegral/*", + "*tyIntegral/*", + "TestHipGraphBasic", +] + +# sharding +shard_index = int(os.getenv("SHARD_INDEX", "1")) - 1 +total_shards = int(os.getenv("TOTAL_SHARDS", "1")) + + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/rocprim", + "--output-on-failure", + "--parallel", + "8", + "--timeout", + "900", + "--repeat", + "until-pass:6", + # shards the tests by running a specific set of tests based on starting test (shard_index) and stride (total_shards) + "--tests-information", + f"{shard_index},,{total_shards}", +] + +if AMDGPU_FAMILIES in TEST_TO_IGNORE and os_type in TEST_TO_IGNORE[AMDGPU_FAMILIES]: + ignored_tests = TEST_TO_IGNORE[AMDGPU_FAMILIES][os_type] + cmd.extend(["--exclude-regex", "|".join(ignored_tests)]) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +environ_vars = os.environ.copy() +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + environ_vars["GTEST_FILTER"] = ":".join(QUICK_TESTS) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_rocrand.py b/test/therock/test_rocrand.py new file mode 100644 index 000000000000..83263c05cf83 --- /dev/null +++ b/test/therock/test_rocrand.py @@ -0,0 +1,107 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +QUICK_TESTS = [ + "*basic_tests*", + "*config_dispatch_tests.*", + "*cpp_utils_tests.*", + "*cpp_wrapper*", + "*distributions/*", + "*generate_host_test/*", + "*generate_long_long_tests/*", + "*generate_normal_tests/*", + "*generate_uniform_tests/*", + "*generator_type_tests.*", + "*kernel_lfsr113*", + "*kernel_lfsr113_poisson/*", + "*kernel_mrg/*", + "*kernel_mtgp32*", + "*kernel_mtgp32_poisson/*", + "*kernel_philox4x32_10*", + "*kernel_philox4x32_10_poisson/*", + "*kernel_scrambled_sobol32*", + "*kernel_scrambled_sobol32_poisson/*", + "*kernel_scrambled_sobol64*", + "*kernel_scrambled_sobol64_poisson/*", + "*kernel_sobol32*", + "*kernel_sobol32_poisson/*", + "*kernel_sobol64*", + "*kernel_sobol64_poisson/*", + "*kernel_threefry2x32_20*", + "*kernel_threefry2x32_20_poisson/*", + "*kernel_threefry2x64_20*", + "*kernel_threefry2x64_20_poisson/*", + "*kernel_threefry4x32_20*", + "*kernel_threefry4x32_20_poisson/*", + "*kernel_threefry4x64_20*", + "*kernel_threefry4x64_20_poisson/*", + "*kernel_xorwow*", + "*kernel_xorwow_poisson/*", + "*lfsr113_engine_api_tests.*", + "*lfsr113_generator/*", + "*lfsr113_generator_prng_tests/*", + "*linkage_tests.*", + "*log_normal_distribution_tests.*", + "*log_normal_tests.*", + "*mrg/*", + "*mrg_generator_prng_tests.*", + "*mrg_log_normal_distribution_tests/*", + "*mrg_normal_distribution_tests/*", + "*mrg_prng_engine_tests/*", + "*mrg_uniform_distribution_tests/*", + "*mtgp32_generator/*", + "*normal_distribution_tests.*", + "*philox4x32_10_generator/*", + "*philox_prng_state_tests.*", + "*poisson_distribution_tests/*", + "*poisson_tests.*", + "*rocrand_generate_tests.*", + "*rocrand_hipgraph_generate_tests.*", + "*sobol_log_normal_distribution_tests/*", + "*sobol_normal_distribution_tests.*", + "*sobol_qrng_tests/*", + "*threefry2x32_20_generator/*", + "*threefry2x64_20_generator/*", + "*threefry4x32_20_generator/*", + "*threefry4x64_20_generator/*", + "*threefry_prng_state_tests.*", + "*xorwow_engine_type_test.*", + "*xorwow_generator/*", + "-*basic_tests/rocrand_basic_tests.rocrand_create_destroy_generator_test/10*", +] + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/rocRAND", + "--output-on-failure", + "--parallel", + "8", + "--timeout", + "900", + "--repeat", + "until-pass:3", +] + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +environ_vars = os.environ.copy() +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + environ_vars["GTEST_FILTER"] = ":".join(QUICK_TESTS) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_rocroller.py b/test/therock/test_rocroller.py new file mode 100644 index 000000000000..7494532e3ca4 --- /dev/null +++ b/test/therock/test_rocroller.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +logging.basicConfig(level=logging.INFO, format="%(message)s") + +# repo + dirs +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR", "") +platform = os.getenv("RUNNER_OS", "linux").lower() + +# Sharding +env = os.environ.copy() +env["GTEST_SHARD_INDEX"] = str(int(os.getenv("SHARD_INDEX", "1")) - 1) +env["GTEST_TOTAL_SHARDS"] = str(int(os.getenv("TOTAL_SHARDS", "1"))) + +# Decide test binary location: +# 1) If CI staged into THEROCK_BIN_DIR, expect "rocroller-tests" there. +# 2) Else use superbuild path. +bin_candidates = [] +if THEROCK_BIN_DIR: + bin_candidates.append(Path(THEROCK_BIN_DIR) / "rocroller-tests") + +BUILD_DIR = Path(os.getenv("THEROCK_BUILD_DIR", THEROCK_DIR / "build")) +bin_candidates.append( + BUILD_DIR + / "math-libs" + / "BLAS" + / "rocRoller" + / "build" + / "test" + / "rocroller-tests" +) + +test_bin = next((p for p in bin_candidates if p.is_file()), None) +if not test_bin: + raise FileNotFoundError( + f"rocroller-tests not found in: {', '.join(map(str, bin_candidates))}" + ) + +# Runtime libs +if platform == "linux": + THEROCK_DIST_DIR = BUILD_DIR / "core" / "clr" / "dist" + llvm_libdir = THEROCK_DIST_DIR / "lib" / "llvm" / "lib" # libomp.so + ld_parts = [ + str(THEROCK_DIST_DIR / "lib"), + str(THEROCK_DIST_DIR / "lib64"), + str(llvm_libdir), + # superbuild libs if running from the build tree: + str(test_bin.parent.parent), # .../rocRoller/build + str(BUILD_DIR / "math-libs" / "BLAS" / "rocRoller" / "stage" / "lib"), + str(BUILD_DIR / "math-libs" / "BLAS" / "rocRoller" / "dist" / "lib"), + ] + # De-dupe while preserving order + seen, ld_clean = set(), [] + for p in ld_parts: + if p and p not in seen: + seen.add(p) + ld_clean.append(p) + env["ROCM_PATH"] = str(THEROCK_DIST_DIR) + env["HIP_PATH"] = str(THEROCK_DIST_DIR) + +# TEST_TYPE → gtest filter +TEST_TYPE = os.getenv("TEST_TYPE", "full").lower() +test_filter_arg = None +if TEST_TYPE == "quick": + # keep this subset (TODO: add more tests) + quick_tests = [ + "ErrorFixtureDeathTest.*", + "ArgumentLoaderTest.*", + "AssemblerTest.*", + "ControlGraphTest.*", + "CommandTest.*", + "ComponentTest.*", + ] + test_filter_arg = "--gtest_filter=" + ":".join(quick_tests) +elif TEST_TYPE == "quick": + test_filter_arg = "--gtest_filter=*quick*" + +# Append to the existing filter or start a negative-only filter +# TODO(#2030): re-enable these tests once compatible with TheRock +# https://github.com/ROCm/TheRock/issues/2030 +_excluded = [ + "AssertTest/GPU_AssertTest.GPU_Assert/28", + "AssertTest/GPU_AssertTest.GPU_UnconditionalAssert/28", + "AssertTest/GPU_AssertTest.GPU_Assert/29", + "AssertTest/GPU_AssertTest.GPU_UnconditionalAssert/29", + "GPU_KernelTests/GPU_KernelTest.GPU_WholeKernel/1", +] +_exclude_str = ":".join(_excluded) +if test_filter_arg: + test_filter_arg = f"{test_filter_arg}-{_exclude_str}" +else: + test_filter_arg = f"--gtest_filter=-{_exclude_str}" + +cmd = [str(test_bin)] +if test_filter_arg: + cmd.append(test_filter_arg) + +extra = os.getenv("EXTRA_GTEST_ARGS", "") +if extra: + cmd += shlex.split(extra) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=str(THEROCK_DIR), check=True, env=env) diff --git a/test/therock/test_rocsolver.py b/test/therock/test_rocsolver.py new file mode 100644 index 000000000000..56872fd9dae6 --- /dev/null +++ b/test/therock/test_rocsolver.py @@ -0,0 +1,60 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +OUTPUT_ARTIFACTS_DIR = os.getenv("OUTPUT_ARTIFACTS_DIR") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +envion_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +envion_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +envion_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +cmd = [ + f"{THEROCK_BIN_DIR}/rocsolver-test", +] + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +# Test filter patterns retrieved from https://github.com/ROCm/rocm-libraries/blob/a18b17eef6c24bcd4bcf8dd6a0e36325cbcd11a7/projects/rocsolver/rtest.xml +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + quick_tests = [ + "checkin*BDSQR*", + "checkin*STEBZ*", + "checkin*STEIN*", + "checkin*STERF*", + "checkin*STEQR*", + "checkin*SYEVJ*", + "checkin*HEEVJ*", + "checkin*LARFG*", + "checkin*LARF*", + "checkin*LARFT*", + "checkin*GETF2*", + "checkin*POTF2*", + "checkin*GEQR2*", + "checkin*GELQ2*", + "checkin*SPLITLU*", + "checkin*REFACTLU*", + "checkin*REFACTCHOL*", + ] + cmd.extend([f"--gtest_filter={':'.join(quick_tests)}-*LARFB*:*known_bug*"]) +else: + cmd.extend( + ["--gtest_filter=checkin*-*known_bug*:checkin_lapack/SYGVDX_INPLACE.__float/41"] + ) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=envion_vars) diff --git a/test/therock/test_rocsparse.py b/test/therock/test_rocsparse.py new file mode 100644 index 000000000000..5299099ae749 --- /dev/null +++ b/test/therock/test_rocsparse.py @@ -0,0 +1,46 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = Path(os.getenv("THEROCK_BIN_DIR")).resolve() +OUTPUT_ARTIFACTS_DIR = Path(os.getenv("OUTPUT_ARTIFACTS_DIR")).resolve() +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent.resolve() + +logging.basicConfig(level=logging.INFO) + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + test_filter = [ + "--yaml", + f"{THEROCK_DIR}/build/share/rocsparse/test/rocsparse_smoke.yaml", + ] +else: + # TODO(#2616): Enable full tests once known test issues are resolved + test_filter = [ + "--yaml", + f"{THEROCK_DIR}/build/share/rocsparse/test/rocsparse_smoke.yaml", + ] + +cmd = [ + f"{THEROCK_BIN_DIR}/rocsparse-test", + "--matrices-dir", + f"{OUTPUT_ARTIFACTS_DIR}/clients/matrices/", +] + test_filter +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_rocthrust.py b/test/therock/test_rocthrust.py new file mode 100644 index 000000000000..1944dab6b6c7 --- /dev/null +++ b/test/therock/test_rocthrust.py @@ -0,0 +1,150 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import platform +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +logging.basicConfig(level=logging.INFO) + +QUICK_TESTS = [ + "AllocatorTests.*", + "AsyncExclusiveScan*", + "AsyncInclusiveScan*", + "AsyncReduce*", + "AsyncSort*", + "AsyncTransform*", + "AsyncTriviallyRelocatableElements*", + "ConstantIteratorTests.*", + "Copy*", + "CopyN*", + "Count*", + "CountingIteratorTests.*", + "Dereference*", + "DeviceDelete*", + "DevicePathSimpleTest", + "DevicePtrTests.*", + "DeviceReferenceTests.*", + "DiscardIteratorTests.*", + "EqualTests.*", + "Fill*", + "Find*", + "ForEach*", + "Gather*", + "Generate*", + "InnerProduct*", + "IsPartitioned*", + "IsSorted*", + "IsSortedUntil*", + "MemoryTests.*", + "Merge*", + "MergeByKey*", + "Mr*Tests.*", + "Partition*", + "PartitionPoint*", + "PermutationIteratorTests.*", + "RandomTests.*", + "Reduce*", + "ReduceByKey*", + "Remove*", + "RemoveIf*", + "Replace*", + "ReverseIterator*", + "Scan*", + "ScanByKey*", + "Scatter*", + "Sequence*", + "SetDifference*", + "SetIntersection*", + "SetSymmetricDifference*", + "Shuffle*", + "Sort*", + "StableSort*", + "StableSortByKey*", + "Tabulate*", + "TestBijectionLength", + "TestHipThrustCopy.DeviceToDevice", + "Transform*", + "TransformIteratorTests.*", + "TransformReduce*", + "TransformScan*", + "UninitializedCopy*", + "UninitializedFill*", + "Unique*", + "Vector*", + "VectorAllocatorTests.*", + "ZipIterator*", +] + +# Some platforms are less capable than others. +ctest_parallel_count = 8 +if AMDGPU_FAMILIES == "gfx1152": + ctest_parallel_count = 4 +elif AMDGPU_FAMILIES == "gfx1153": + ctest_parallel_count = 4 + +# Generate the resource spec file for ctest +rocm_base = Path(THEROCK_BIN_DIR).resolve().parent +ld_paths = [ + rocm_base / "lib", +] +ld_paths_str = os.pathsep.join(str(p) for p in ld_paths) +existing_path = os.environ.get("PATH", "") +existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "") +env_vars = os.environ.copy() +env_vars["PATH"] = ( + f"{THEROCK_BIN_DIR}{os.pathsep}{existing_path}" + if existing_path + else THEROCK_BIN_DIR +) +env_vars["ROCM_PATH"] = str(rocm_base) +env_vars["LD_LIBRARY_PATH"] = ( + f"{ld_paths_str}{os.pathsep}{existing_ld_path}" + if existing_ld_path + else ld_paths_str +) + +is_windows = platform.system() == "Windows" +exe_name = "generate_resource_spec.exe" if is_windows else "generate_resource_spec" +exe_dir = rocm_base / "bin" / "rocthrust" + +resource_spec_file = "resources.json" +res_gen_cmd = [ + str(exe_dir / exe_name), + str(exe_dir / resource_spec_file), +] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(res_gen_cmd)}") +subprocess.run(res_gen_cmd, cwd=THEROCK_DIR, check=True, env=env_vars) + +# Run ctest with resource spec file +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/rocthrust", + "--output-on-failure", + "--parallel", + f"{ctest_parallel_count}", + "--resource-spec-file", + resource_spec_file, + "--timeout", + "300", +] + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +environ_vars = os.environ.copy() +test_type = os.getenv("TEST_TYPE", "full") +if test_type == "quick": + environ_vars["GTEST_FILTER"] = ":".join(QUICK_TESTS) + +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run(cmd, cwd=THEROCK_DIR, check=True, env=environ_vars) diff --git a/test/therock/test_rocwmma.py b/test/therock/test_rocwmma.py new file mode 100644 index 000000000000..accfb9ed1e7b --- /dev/null +++ b/test/therock/test_rocwmma.py @@ -0,0 +1,76 @@ +# Copyright Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import logging +import os +import shlex +import subprocess +from pathlib import Path + +THEROCK_BIN_DIR = os.getenv("THEROCK_BIN_DIR") +AMDGPU_FAMILIES = os.getenv("AMDGPU_FAMILIES") +platform = os.getenv("RUNNER_OS").lower() +SCRIPT_DIR = Path(__file__).resolve().parent +THEROCK_DIR = SCRIPT_DIR.parent.parent.parent + +# GTest sharding +SHARD_INDEX = os.getenv("SHARD_INDEX", 1) +TOTAL_SHARDS = os.getenv("TOTAL_SHARDS", 1) +environ_vars = os.environ.copy() +# For display purposes in the GitHub Action UI, the shard array is 1th indexed. However for shard indexes, we convert it to 0th index. +environ_vars["GTEST_SHARD_INDEX"] = str(int(SHARD_INDEX) - 1) +environ_vars["GTEST_TOTAL_SHARDS"] = str(TOTAL_SHARDS) + +# Enable GTest "brief" output: only show failures and the final results +environ_vars["GTEST_BRIEF"] = str(1) + +# Some of our runtime kernel compilations have been relying on either ROCM_PATH being set, or ROCm being installed at +# /opt/rocm. Neither of these is true in TheRock so we need to supply ROCM_PATH to our tests. +ROCM_PATH = Path(THEROCK_BIN_DIR).resolve().parent +environ_vars["ROCM_PATH"] = str(ROCM_PATH) + +logging.basicConfig(level=logging.INFO) + +# If quick tests are enabled, we run quick tests only. +# Otherwise, we run the normal test suite +test_type = os.getenv("TEST_TYPE", "full") + +# TODO(#2823): Re-enable test once flaky issue is resolved +TESTS_TO_IGNORE = ["unpack_util_test"] + +test_subdir = "" +timeout = "3600" +if test_type == "quick": + # The emulator regression tests are very fast. + # If we need something even faster we can use "/smoke" here. + test_subdir = "/regression" + timeout = "720" +elif test_type == "regression": + test_subdir = "/regression" + timeout = "720" + +# Make per-device adjustments +ctest_parallelism = "2" +if AMDGPU_FAMILIES == "gfx1153": + ctest_parallelism = "1" + +cmd = [ + "ctest", + "--test-dir", + f"{THEROCK_BIN_DIR}/rocwmma{test_subdir}", + "--output-on-failure", + "--parallel", + ctest_parallelism, + "--timeout", + timeout, + "--exclude-regex", + "|".join(TESTS_TO_IGNORE), +] +logging.info(f"++ Exec [{THEROCK_DIR}]$ {shlex.join(cmd)}") + +subprocess.run( + cmd, + cwd=THEROCK_DIR, + check=True, + env=environ_vars, +)