Skip to content

Commit bef95b3

Browse files
committed
Fix DEB10 wheel build and verification
- Link amdsminic static library into libamd_smi_python.so to resolve undefined NIC symbols - Prevent pip-context wrapper from falling back to system libamd_smi.so to avoid dual-loading segfault - Use RTLD_LOCAL instead of RTLD_GLOBAL when loading .so via ctypes - Make wheel verification fatal in CI - Add trailing newline to setup.cfg.in
1 parent 8f00256 commit bef95b3

File tree

4 files changed

+27
-6
lines changed

4 files changed

+27
-6
lines changed

.github/workflows/amdsmi-build.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ jobs:
183183
184184
# Verify wheel installation
185185
echo 'Verifying wheel installation...'
186+
# Diagnostics: list installed .so files without importing (import triggers ctypes.CDLL)
187+
SITE_DIR=$($PYTHON_CMD -c "import site; print(site.getsitepackages()[0])" 2>/dev/null || echo "/usr/lib/python3/dist-packages")
188+
echo "Wheel installed to: $SITE_DIR/amdsmi/"
189+
ls -lh "$SITE_DIR/amdsmi/"*.so 2>/dev/null || echo " No .so files found in wheel install"
190+
186191
cd /tmp && $PYTHON_CMD -c "import amdsmi; print('✓ Import successful'); amdsmi.amdsmi_init(); print('✓ Library loaded'); amdsmi.amdsmi_shut_down(); print('✓ Wheel working!')"
187192
echo 'Python wheel build and install completed on ${{ matrix.os }}'
188193
break

projects/amdsmi/py-interface/amdsmi_wrapper.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,11 +243,15 @@ def _build_candidate_paths():
243243
candidates = []
244244

245245
if context == "pip":
246-
# .so lives alongside the wrapper inside the wheel / site-packages
246+
# .so is self-contained inside the wheel / site-packages.
247+
# Do NOT fall back to the system libamd_smi.so: loading both libraries
248+
# in the same process causes segfaults during static initialisation of
249+
# std::variant tables on older toolchains (GCC 8 / glibc 2.28).
247250
candidates.append(base / "libamd_smi_python.so")
248-
else:
249-
# System package - .so lives under <rocm_root>/lib/
250-
candidates.append(base / "lib" / "libamd_smi.so")
251+
return candidates
252+
253+
# System package - .so lives under <rocm_root>/lib/
254+
candidates.append(base / "lib" / "libamd_smi.so")
251255

252256
# Fallbacks
253257
for env_var in ("ROCM_HOME", "ROCM_PATH"):
@@ -276,7 +280,15 @@ def _load_library():
276280
"""
277281
candidates = _build_candidate_paths()
278282
last_err = None
279-
mode = getattr(ctypes, "RTLD_GLOBAL", 0)
283+
284+
# Use RTLD_LOCAL (default, mode=0) to keep each library's symbols in its
285+
# own scope. RTLD_GLOBAL is unnecessary because ctypes resolves symbols
286+
# via the returned handle (dlsym), and using it leaks template symbols
287+
# into the global namespace. When the pip-context libamd_smi_python.so
288+
# is tried first and fails, those leaked symbols collide with the
289+
# fallback libamd_smi.so's own copies, causing a segfault during the
290+
# latter's static initialisation on older platforms (Debian 10 / glibc 2.28).
291+
mode = 0
280292

281293
for candidate in candidates:
282294
try:

projects/amdsmi/py-interface/setup.cfg.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ include_package_data = True
1818
zip_safe = False
1919

2020
[options.package_data]
21-
* = *.so
21+
* = *.so

projects/amdsmi/src/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,12 @@ if(BUILD_PYTHON_LIB)
156156
rt
157157
Threads::Threads
158158
${CMAKE_DL_LIBS}
159+
amdsminic
159160
${FILESYSTEM_LIB}
160161
)
162+
target_link_directories(${AMD_SMI}_python PRIVATE
163+
${CMAKE_CURRENT_BINARY_DIR}/nic/ai-nic/amdsmi_unified/build/
164+
)
161165
target_include_directories(${AMD_SMI}_python PRIVATE
162166
${CMAKE_CURRENT_SOURCE_DIR}
163167
${PROJECT_SOURCE_DIR}/rocm_smi/include

0 commit comments

Comments
 (0)