Skip to content

Commit b6717d5

Browse files
committed
dev-util/Tensile: fix compilation of sci-libs/rocBLAS on gfx906
Clang-20 disallowed op_sel in some VOP3P dot instructions. See: llvm/llvm-project#100485 As ROCm maintains a fork of Clang, these changes did not reach official ROCm releases. However Gentoo uses original Clang-20, which has these incompatible changes. Luckilly, in Tensile these op_sel do nothing. Generally, they allow to shuffle vector elements before multiplication, but with values 0,0/1,1 shuffling is disabled and op_sel can be removed. Closes: https://bugs.gentoo.org/949817 Signed-off-by: Sv. Lockal <[email protected]>
1 parent 3991ad9 commit b6717d5

File tree

2 files changed

+147
-1
lines changed

2 files changed

+147
-1
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# Copyright 1999-2025 Gentoo Authors
2+
# Distributed under the terms of the GNU General Public License v2
3+
4+
EAPI=8
5+
6+
PYTHON_COMPAT=( python3_{10..13} )
7+
DISTUTILS_USE_PEP517=setuptools
8+
ROCM_VERSION=${PV}
9+
LLVM_COMPAT=( 20 )
10+
11+
inherit cmake distutils-r1 llvm-r1 prefix rocm
12+
13+
DESCRIPTION="Stretching GPU performance for GEMMs and tensor contractions"
14+
HOMEPAGE="https://github.com/ROCm/Tensile"
15+
SRC_URI="https://github.com/ROCm/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz"
16+
S="${WORKDIR}/${PN}-rocm-${PV}"
17+
18+
LICENSE="MIT"
19+
SLOT="0/$(ver_cut 1-2)"
20+
KEYWORDS="~amd64"
21+
IUSE="client test"
22+
REQUIRED_USE="client? ( ${ROCM_REQUIRED_USE} )"
23+
24+
# tests can freeze machine depending on gpu/kernel
25+
RESTRICT="test"
26+
27+
RDEPEND="${PYTHON_DEPS}
28+
client? ( dev-libs/boost )
29+
>=dev-cpp/msgpack-cxx-6.0.0
30+
dev-python/pyyaml[${PYTHON_USEDEP}]
31+
dev-python/msgpack[${PYTHON_USEDEP}]
32+
dev-python/joblib[${PYTHON_USEDEP}]
33+
dev-util/hip:${SLOT}
34+
dev-util/rocm-smi:${SLOT}
35+
$(llvm_gen_dep '
36+
llvm-core/clang:${LLVM_SLOT}
37+
')
38+
"
39+
DEPEND="${RDEPEND}"
40+
BDEPEND="
41+
test? (
42+
dev-python/pytest-forked[${PYTHON_USEDEP}]
43+
dev-python/pytest-xdist[${PYTHON_USEDEP}]
44+
dev-python/filelock[${PYTHON_USEDEP}]
45+
dev-python/joblib[${PYTHON_USEDEP}]
46+
)
47+
"
48+
49+
distutils_enable_tests pytest
50+
51+
PATCHES=(
52+
"${FILESDIR}"/${PN}-5.4.2-fix-arch-parse.patch
53+
"${FILESDIR}"/${PN}-6.3.0-use-ninja.patch
54+
"${FILESDIR}"/${PN}-6.1.1-fix-msgpack-dependency.patch
55+
"${FILESDIR}"/${PN}-6.0.2-expand-isa-compatibility.patch
56+
)
57+
58+
CMAKE_USE_DIR="${S}/${PN}/Source"
59+
60+
src_prepare() {
61+
distutils-r1_src_prepare
62+
sed -e "s,\@LLVM_PATH\@,$(get_llvm_prefix),g" \
63+
"${FILESDIR}"/${PN}-5.7.1-gentoopath.patch > "${S}"/gentoopath.patch || die
64+
eapply $(prefixify_ro "${S}"/gentoopath.patch)
65+
66+
pushd ${PN} || die
67+
68+
sed -e "/ROCM_SMI_ROOT/s,lib,$(get_libdir)," \
69+
-i Source/cmake/FindROCmSMI.cmake || die
70+
sed -r -e "/TENSILE_USE_LLVM/s/ON/OFF/" \
71+
-i Source/CMakeLists.txt || die
72+
73+
# ${Tensile_ROOT}/bin does not exists; call command directly
74+
sed -e "s,\${Tensile_ROOT}/bin/,,g" -i cmake/TensileConfig.cmake || die
75+
76+
local Tensile_share_dir="\"${EPREFIX}/usr/share/${PN}\""
77+
sed -e "/HipClangVersion/s/0.0.0/$(hipconfig -v)/" -i Common.py || die
78+
79+
sed -e "s,os.path.dirname(os.path.realpath(__file__)),${Tensile_share_dir},g" \
80+
-i ReplacementKernels.py Common.py ${PN}.py || die
81+
82+
sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile/Source\", end='')|" -i __init__.py || die
83+
84+
# bug 949817: fix v_dot4_i32_i8 syntax for clang-20
85+
sed 's/ op_sel:\[0,0\] op_sel_hi:\[1,1\]//' -i Components/MAC_I8X4.py || die
86+
87+
popd || die
88+
89+
sed -e "/package_data/d" -e "/data_files/d" -i setup.py || die
90+
use client && PATCHES='' cmake_src_prepare # do not apply patches again in cmake_src_prepare
91+
}
92+
93+
src_configure() {
94+
rocm_use_hipcc
95+
96+
distutils-r1_src_configure
97+
if use client; then
98+
local mycmakeargs=(
99+
-DCMAKE_SKIP_RPATH=ON
100+
-DTENSILE_USE_MSGPACK=ON
101+
-DTENSILE_USE_LLVM=ON
102+
-DTensile_LIBRARY_FORMAT=msgpack
103+
-DAMDGPU_TARGETS="$(get_amdgpu_flags)"
104+
)
105+
cmake_src_configure
106+
fi
107+
}
108+
109+
src_compile() {
110+
distutils-r1_src_compile
111+
use client && cmake_src_compile
112+
}
113+
114+
python_install() {
115+
distutils-r1_python_install
116+
117+
python_moduleinto Tensile
118+
pushd Tensile || die
119+
python_domodule Components
120+
python_domodule Utilities
121+
python_domodule TensileCreateLib
122+
}
123+
124+
src_install() {
125+
distutils-r1_src_install
126+
127+
pushd ${PN} || die
128+
insinto /usr/share/${PN}
129+
doins -r Configs Perf Source CustomKernels
130+
insinto /usr/$(get_libdir)/cmake/${PN}
131+
doins cmake/*.cmake
132+
133+
if use client; then
134+
pushd "${BUILD_DIR}" || die
135+
dobin client/tensile_client
136+
fi
137+
}
138+
139+
# Test suite fails to start without this
140+
python_test() {
141+
export ROCM_PATH="${EPREFIX}/usr"
142+
epytest
143+
}

dev-util/Tensile/Tensile-6.4.1.ebuild

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,13 @@ src_prepare() {
8181

8282
sed -e "s|os\.path\.dirname.*$|\"${EPREFIX}/usr/share/Tensile/Source\", end='')|" -i __init__.py || die
8383

84+
# bug 949817: fix v_dot4_i32_i8 syntax for clang-20
85+
sed 's/ op_sel:\[0,0\] op_sel_hi:\[1,1\]//' -i Components/MAC_I8X4.py || die
86+
8487
popd || die
8588

8689
sed -e "/package_data/d" -e "/data_files/d" -i setup.py || die
87-
use client && PATCHES= cmake_src_prepare # do not apply patches again in cmake_src_prepare
90+
use client && PATCHES='' cmake_src_prepare # do not apply patches again in cmake_src_prepare
8891
}
8992

9093
src_configure() {

0 commit comments

Comments
 (0)