|
| 1 | +# Copyright 1999-2025 Gentoo Authors |
| 2 | +# Distributed under the terms of the GNU General Public License v2 |
| 3 | + |
| 4 | +EAPI=8 |
| 5 | + |
| 6 | +PYTHON_COMPAT=( python3_{10..13} ) |
| 7 | +DISTUTILS_USE_PEP517=setuptools |
| 8 | +ROCM_VERSION=${PV} |
| 9 | +LLVM_COMPAT=( 20 ) |
| 10 | + |
| 11 | +inherit cmake distutils-r1 llvm-r1 prefix rocm |
| 12 | + |
| 13 | +DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions" |
| 14 | +HOMEPAGE="https://github.com/ROCm/Tensile" |
| 15 | +SRC_URI="https://github.com/ROCm/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz" |
| 16 | +S="${WORKDIR}/${PN}-rocm-${PV}" |
| 17 | + |
| 18 | +LICENSE="MIT" |
| 19 | +SLOT="0/$(ver_cut 1-2)" |
| 20 | +KEYWORDS="~amd64" |
| 21 | +IUSE="client test" |
| 22 | +REQUIRED_USE="client? ( ${ROCM_REQUIRED_USE} )" |
| 23 | + |
| 24 | +# tests can freeze machine depending on gpu/kernel |
| 25 | +RESTRICT="test" |
| 26 | + |
| 27 | +RDEPEND="${PYTHON_DEPS} |
| 28 | + client? ( dev-libs/boost ) |
| 29 | + >=dev-cpp/msgpack-cxx-6.0.0 |
| 30 | + dev-python/pyyaml[${PYTHON_USEDEP}] |
| 31 | + dev-python/msgpack[${PYTHON_USEDEP}] |
| 32 | + dev-python/joblib[${PYTHON_USEDEP}] |
| 33 | + dev-util/hip:${SLOT} |
| 34 | + dev-util/rocm-smi:${SLOT} |
| 35 | + $(llvm_gen_dep ' |
| 36 | + llvm-core/clang:${LLVM_SLOT} |
| 37 | + ') |
| 38 | +" |
| 39 | +DEPEND="${RDEPEND}" |
| 40 | +BDEPEND=" |
| 41 | + test? ( |
| 42 | + dev-python/pytest-forked[${PYTHON_USEDEP}] |
| 43 | + dev-python/pytest-xdist[${PYTHON_USEDEP}] |
| 44 | + dev-python/filelock[${PYTHON_USEDEP}] |
| 45 | + dev-python/joblib[${PYTHON_USEDEP}] |
| 46 | + ) |
| 47 | +" |
| 48 | + |
| 49 | +distutils_enable_tests pytest |
| 50 | + |
| 51 | +PATCHES=( |
| 52 | + "${FILESDIR}"/${PN}-5.4.2-fix-arch-parse.patch |
| 53 | + "${FILESDIR}"/${PN}-6.3.0-use-ninja.patch |
| 54 | + "${FILESDIR}"/${PN}-6.1.1-fix-msgpack-dependency.patch |
| 55 | + "${FILESDIR}"/${PN}-6.0.2-expand-isa-compatibility.patch |
| 56 | +) |
| 57 | + |
| 58 | +CMAKE_USE_DIR="${S}/${PN}/Source" |
| 59 | + |
| 60 | +src_prepare() { |
| 61 | + distutils-r1_src_prepare |
| 62 | + sed -e "s,\@LLVM_PATH\@,$(get_llvm_prefix),g" \ |
| 63 | + "${FILESDIR}"/${PN}-5.7.1-gentoopath.patch > "${S}"/gentoopath.patch || die |
| 64 | + eapply $(prefixify_ro "${S}"/gentoopath.patch) |
| 65 | + |
| 66 | + pushd ${PN} || die |
| 67 | + |
| 68 | + sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \ |
| 69 | + -i Source/cmake/FindROCmSMI.cmake || die |
| 70 | + sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \ |
| 71 | + -i Source/CMakeLists.txt || die |
| 72 | + |
| 73 | + # ${Tensile_ROOT}/bin does not exists; call command directly |
| 74 | + sed -e "s,\${Tensile_ROOT}/bin/,,g" -i cmake/TensileConfig.cmake || die |
| 75 | + |
| 76 | + local Tensile_share_dir="\"${EPREFIX}/usr/share/${PN}\"" |
| 77 | + sed -e "/HipClangVersion/s/0.0.0/$(hipconfig -v)/" -i Common.py || die |
| 78 | + |
| 79 | + sed -e "s,os.path.dirname(os.path.realpath(__file__)),${Tensile_share_dir},g" \ |
| 80 | + -i ReplacementKernels.py Common.py ${PN}.py || die |
| 81 | + |
| 82 | + sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile/Source\", end='')|" -i __init__.py || die |
| 83 | + |
| 84 | + # bug 949817: fix v_dot4_i32_i8 syntax for clang-20 |
| 85 | + sed 's/ op_sel:\[0,0\] op_sel_hi:\[1,1\]//' -i Components/MAC_I8X4.py || die |
| 86 | + |
| 87 | + popd || die |
| 88 | + |
| 89 | + sed -e "/package_data/d" -e "/data_files/d" -i setup.py || die |
| 90 | + use client && PATCHES='' cmake_src_prepare # do not apply patches again in cmake_src_prepare |
| 91 | +} |
| 92 | + |
| 93 | +src_configure() { |
| 94 | + rocm_use_hipcc |
| 95 | + |
| 96 | + distutils-r1_src_configure |
| 97 | + if use client; then |
| 98 | + local mycmakeargs=( |
| 99 | + -DCMAKE_SKIP_RPATH=ON |
| 100 | + -DTENSILE_USE_MSGPACK=ON |
| 101 | + -DTENSILE_USE_LLVM=ON |
| 102 | + -DTensile_LIBRARY_FORMAT=msgpack |
| 103 | + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" |
| 104 | + ) |
| 105 | + cmake_src_configure |
| 106 | + fi |
| 107 | +} |
| 108 | + |
| 109 | +src_compile() { |
| 110 | + distutils-r1_src_compile |
| 111 | + use client && cmake_src_compile |
| 112 | +} |
| 113 | + |
| 114 | +python_install() { |
| 115 | + distutils-r1_python_install |
| 116 | + |
| 117 | + python_moduleinto Tensile |
| 118 | + pushd Tensile || die |
| 119 | + python_domodule Components |
| 120 | + python_domodule Utilities |
| 121 | + python_domodule TensileCreateLib |
| 122 | +} |
| 123 | + |
| 124 | +src_install() { |
| 125 | + distutils-r1_src_install |
| 126 | + |
| 127 | + pushd ${PN} || die |
| 128 | + insinto /usr/share/${PN} |
| 129 | + doins -r Configs Perf Source CustomKernels |
| 130 | + insinto /usr/$(get_libdir)/cmake/${PN} |
| 131 | + doins cmake/*.cmake |
| 132 | + |
| 133 | + if use client; then |
| 134 | + pushd "${BUILD_DIR}" || die |
| 135 | + dobin client/tensile_client |
| 136 | + fi |
| 137 | +} |
| 138 | + |
| 139 | +# Test suite fails to start without this |
| 140 | +python_test() { |
| 141 | + export ROCM_PATH="${EPREFIX}/usr" |
| 142 | + epytest |
| 143 | +} |
0 commit comments