Skip to content

Commit 1b6ae89

Browse files
committed
Addressed several minor issues identified by copilot review
1 parent 2d7817f commit 1b6ae89

File tree

7 files changed

+57
-26
lines changed

7 files changed

+57
-26
lines changed

e4s_cl/cf/trace.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,13 @@ def list_syscalls():
4949
event = debugger.waitSyscall()
5050
except ProcessExit as event:
5151
return_code = event.exitcode
52-
# When a process exits (especially in MPI with multiple processes),
53-
# the debugger may still have other processes. Check if we should continue.
54-
if len(debugger.dict) > 0:
55-
continue
56-
else:
57-
break
52+
# When a process exits in a multi-process MPI run, we need to check
53+
# if there are other active processes remaining. The exiting process
54+
# may still be in debugger.dict at this point, so check if there are
55+
# processes other than the one that just exited.
56+
# Note: while debugger checks len(debugger.list), which tracks active processes.
57+
# If the loop continues (debugger is still truthy), we can continue tracing.
58+
continue
5859
except ProcessSignal as event:
5960
event.process.syscall(event.signum)
6061
continue

e4s_cl/cf/wi4mpi/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,11 @@ def wi4mpi_preload(install_dir: Path, container_library_dir: str = None) -> List
286286
NOTE: The fake libraries (fakelib{FROM}/) go in LD_LIBRARY_PATH, NOT in LD_PRELOAD.
287287
This matches the official Wi4MPI launcher script behavior.
288288
289+
IMPORTANT: When container_library_dir is provided, this function assumes the target
290+
MPI libraries (WI4MPI_RUN_MPI_C_LIB and WI4MPI_RUN_MPI_F_LIB) have already been
291+
imported into the container at that location. The caller must ensure import_library()
292+
is called before this function when running in container mode.
293+
289294
Args:
290295
install_dir: Path to Wi4MPI installation directory
291296
container_library_dir: The path where host libraries are mounted inside

e4s_cl/cf/wi4mpi/install.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,10 +191,13 @@ def _check_wi4mpi_install(install_dir: Path) -> bool:
191191
try:
192192
with open(lib, 'rb') as f:
193193
elf = ELFFile(f)
194-
if elf.header.e_machine != expected:
194+
# Note: With pyelftools 0.27, header['e_machine'] returns a string like 'EM_X86_64'
195+
# Both elf.header.e_machine and elf.header['e_machine'] work equivalently
196+
machine_type = elf.header['e_machine']
197+
if machine_type != expected:
195198
LOGGER.error(
196199
"Wi4MPI library %s architecture mismatch: expected %s, got %s",
197-
lib.name, expected, elf.header.e_machine)
200+
lib.name, expected, machine_type)
198201
return False
199202
except Exception as err:
200203
LOGGER.debug("Failed to check architecture of %s: %s", lib,

e4s_cl/cli/commands/__execute.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ def main(self, argv):
464464
for path in files:
465465
source, dest = path, None
466466
if ':' in path:
467-
parts = path.split(':')
467+
# Split on rightmost colon to handle Unix paths that may contain colons
468+
parts = path.rsplit(':', 1)
468469
if len(parts) == 2:
469470
source, dest = parts
470471
else:
@@ -523,9 +524,8 @@ def _path(library: Library):
523524
# If WI4MPI is found in the environment, import its files and
524525
# preload its required components
525526
wi4mpi_import(container, wi4mpi_install_dir)
526-
params.preload += wi4mpi_preload(wi4mpi_install_dir, container.import_library_dir)
527527

528-
# Override Wi4MPI environment variables to use container paths
528+
# Import target MPI libraries BEFORE constructing preload list
529529
# Wi4MPI uses WI4MPI_RUN_MPI_C_LIB to dlopen the target MPI library
530530
# The host path won't exist inside the container; we need to use
531531
# the container path where host libraries are bound
@@ -544,6 +544,10 @@ def _path(library: Library):
544544
if host_mpi_lib:
545545
params.extra_env["WI4MPI_RUN_MPIIO_C_LIB"] = container_mpi_lib.as_posix()
546546
params.extra_env["WI4MPI_RUN_MPIIO_F_LIB"] = container_mpi_f_lib.as_posix()
547+
548+
# Now construct preload list AFTER libraries are imported
549+
# wi4mpi_preload will use container paths that we just ensured exist
550+
params.preload += wi4mpi_preload(wi4mpi_install_dir, container.import_library_dir)
547551

548552
# Write the entry script to a file, then bind it to the container
549553
script_name = params.setup()

e4s_cl/cli/commands/profile/detect.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,20 @@ def _filter_mpi_artifacts(libs: List[Path], files: List[Path],
286286
launcher = resolved_launcher.resolve()
287287

288288
def _get_lib_prefix(path: Path) -> Path:
289+
"""Extract installation prefix from library path.
290+
291+
Looks for 'lib' or 'lib64' directory markers to determine the prefix.
292+
For example, /opt/mpich/lib/libmpi.so -> /opt/mpich
293+
Falls back to parent directory for non-standard layouts.
294+
"""
289295
for marker in ['lib', 'lib64']:
290296
if marker in path.parts:
291-
return Path(*path.parts[:path.parts.index(marker)])
297+
marker_idx = path.parts.index(marker)
298+
# Ensure marker is a directory component, not the filename
299+
if marker_idx < len(path.parts) - 1:
300+
return Path(*path.parts[:marker_idx])
301+
# Fallback: use parent directory for non-standard layouts
302+
# This handles cases like /custom/path/libmpi.so
292303
return path.parent
293304

294305
def _is_core_mpi(path: Path) -> bool:

e4s_cl/util.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,15 @@ def run_e4scl_subprocess(cmd, cwd=None, env=None, capture_output=False, timeout=
230230
except subprocess.TimeoutExpired:
231231
LOGGER.warning("Subprocess timed out after %s seconds: %s", timeout, cmd)
232232
proc.kill()
233-
proc.wait()
233+
# Drain remaining output to avoid deadlock when process has buffered data
234+
# communicate() will collect any remaining output and wait for termination
235+
try:
236+
output, _ = proc.communicate(timeout=5) # Give 5s for cleanup
237+
except subprocess.TimeoutExpired:
238+
# Process still hasn't terminated after kill+communicate, force wait
239+
proc.wait()
240+
output = ""
234241
returncode = -1
235-
output = ""
236242

237243
if capture_output:
238244
return returncode, output

scripts/demo.sh

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ if [[ -n "${E4S_CL_LAUNCHER_ARGS}" ]]; then
475475
LAUNCHER_ARGS+=("${EXTRA_LAUNCHER_ARGS[@]}")
476476
fi
477477

478-
HOST_MPI_VERSION="$(${HOST_MPIRUN} --version 2>/dev/null | head -n 2 || true)"
478+
HOST_MPI_VERSION="$("${HOST_MPIRUN}" --version 2>/dev/null | head -n 2 || true)"
479479
HOST_MPI_FAMILY="$(detect_mpi_family "${HOST_MPI_VERSION}")"
480480
log "Detected host MPI family: ${HOST_MPI_FAMILY:-unknown}"
481481

@@ -545,10 +545,10 @@ OSU_CHECKSUM_FILE="${E4S_CL_CACHE_DIR}/osu.sha256"
545545
NEED_DOWNLOAD="0"
546546
if [[ ! -f "${OSU_TARBALL}" || ! -f "${OSU_META_FILE}" ]]; then
547547
NEED_DOWNLOAD="1"
548-
rm -rf "${OSU_SRC_DIR}"
548+
[[ -n "${OSU_SRC_DIR}" ]] && rm -rf "${OSU_SRC_DIR}"
549549
elif ! grep -Fq "osu_url=${E4S_CL_OSU_URL}" "${OSU_META_FILE}"; then
550550
NEED_DOWNLOAD="1"
551-
rm -rf "${OSU_SRC_DIR}"
551+
[[ -n "${OSU_SRC_DIR}" ]] && rm -rf "${OSU_SRC_DIR}"
552552
fi
553553

554554
if [[ "${NEED_DOWNLOAD}" == "1" ]]; then
@@ -626,7 +626,7 @@ fi
626626

627627
if [[ "${REBUILD_HOST_OSU}" == "1" ]]; then
628628
log "(Re)building host OSU benchmarks"
629-
rm -rf "${OSU_HOST_PREFIX}"
629+
[[ -n "${OSU_HOST_PREFIX}" ]] && rm -rf "${OSU_HOST_PREFIX}"
630630
mkdir -p "${OSU_HOST_PREFIX}"
631631
(
632632
# Clear CFLAGS/CXXFLAGS to avoid injecting incompatible compiler flags
@@ -707,7 +707,8 @@ EOF
707707
if [[ -z "${E4S_CL_APPTAINER_BUILD_ARGS}" ]]; then
708708
E4S_CL_APPTAINER_BUILD_ARGS="--fakeroot"
709709
fi
710-
run_silent ${CONTAINER_CMD} build ${E4S_CL_APPTAINER_BUILD_ARGS} "${E4S_CL_IMAGE}" "${E4S_CL_IMAGE_DEF}"
710+
# Note: E4S_CL_APPTAINER_BUILD_ARGS intentionally unquoted to allow word-splitting for multiple flags
711+
run_silent "${CONTAINER_CMD}" build ${E4S_CL_APPTAINER_BUILD_ARGS} "${E4S_CL_IMAGE}" "${E4S_CL_IMAGE_DEF}"
711712
fi
712713
fi
713714

@@ -716,9 +717,9 @@ if [[ "${E4S_CL_IMAGE}" != docker://* && "${E4S_CL_IMAGE}" != library://* ]]; th
716717
[[ -f "${E4S_CL_IMAGE}" ]] || fail "E4S_CL_IMAGE not found: ${E4S_CL_IMAGE}"
717718
fi
718719

719-
CONTAINER_MPI_VERSION="$(${CONTAINER_CMD} exec "${E4S_CL_IMAGE}" mpirun --version 2>/dev/null | head -n 2 || true)"
720+
CONTAINER_MPI_VERSION="$("${CONTAINER_CMD}" exec "${E4S_CL_IMAGE}" mpirun --version 2>/dev/null | head -n 2 || true)"
720721
if [[ -z "$(detect_mpi_family "${CONTAINER_MPI_VERSION}")" ]]; then
721-
CONTAINER_MPI_VERSION+=$'\n'"$(${CONTAINER_CMD} exec "${E4S_CL_IMAGE}" mpichversion 2>/dev/null | head -n 2 || true)"
722+
CONTAINER_MPI_VERSION+=$'\n'"$("${CONTAINER_CMD}" exec "${E4S_CL_IMAGE}" mpichversion 2>/dev/null | head -n 2 || true)"
722723
fi
723724
CONTAINER_MPI_FAMILY="$(detect_mpi_family "${CONTAINER_MPI_VERSION}")"
724725

@@ -774,7 +775,7 @@ run "${E4S_CL_BIN}" profile select "${E4S_CL_PROFILE_NAME}"
774775
# This mirrors how a user would re-run detection when the profile isn't populated.
775776
profile_has_bindings() {
776777
local output
777-
output="$(${E4S_CL_BIN} profile show)"
778+
output="$("${E4S_CL_BIN}" profile show)"
778779
if echo "${output}" | awk '/^Bound libraries:/{inlib=1; next} /^Bound files:/{inlib=0} inlib && /^ - /{print}' | grep -q .; then
779780
return 0
780781
fi
@@ -821,7 +822,7 @@ fi
821822

822823
if [[ "${REBUILD_CONT_OSU}" == "1" ]]; then
823824
log "(Re)building container OSU benchmarks"
824-
rm -rf "${OSU_CONT_PREFIX}"
825+
[[ -n "${OSU_CONT_PREFIX}" ]] && rm -rf "${OSU_CONT_PREFIX}"
825826
mkdir -p "${OSU_CONT_PREFIX}"
826827
run_silent "${CONTAINER_CMD}" exec -B "${E4S_CL_WORKDIR}:/work" -B "${E4S_CL_CACHE_DIR}:/cache" "${E4S_CL_IMAGE}" bash -lc "\
827828
set -euo pipefail; \
@@ -861,9 +862,9 @@ if [[ "${E4S_CL_RUN_CONTAINER_BASELINE}" == "1" ]]; then
861862
for bench in "${OSU_BENCHES[@]}"; do
862863
bench_name="$(basename "${bench}")"
863864
out_file="${E4S_CL_WORKDIR}/container_${bench_name}.txt"
864-
run_timed "container_${bench_name}" ${CONTAINER_CMD} exec -B "${E4S_CL_WORKDIR}:/work" "${E4S_CL_IMAGE}" bash -lc "\
865-
mpirun -np ${E4S_CL_MPI_PROCS} /work/osu-container/libexec/osu-micro-benchmarks/mpi/${bench} ${OSU_ARGS[*]} \
866-
" | tee "${out_file}"
865+
# Build command array to avoid injection via bash -lc string interpolation
866+
mpi_cmd=(mpirun -np "${E4S_CL_MPI_PROCS}" "/work/osu-container/libexec/osu-micro-benchmarks/mpi/${bench}" "${OSU_ARGS[@]}")
867+
run_timed "container_${bench_name}" "${CONTAINER_CMD}" exec -B "${E4S_CL_WORKDIR}:/work" "${E4S_CL_IMAGE}" bash -lc "$(printf '%q ' "${mpi_cmd[@]}")" | tee "${out_file}"
867868
done
868869
fi
869870

0 commit comments

Comments
 (0)