Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions ci_utils/common/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,78 @@ def read_json_file(json_path: str) -> Dict[str, Any]:
logger.error("Failed to parse JSON file %s: %s", json_path, e)
raise

# -----------------------
# Check for process crash and dump backtraces from core files
# -----------------------
def check_process_crash_and_backtrace(
session,
process_name="ganesha",
cores_dir="/tmp/cores",
binary_path="/usr/bin/ganesha.nfsd",
gdb_cmd=None,
):
"""
Check if a process is running; if not, look for core dumps in cores_dir,
install gdb if needed, and run gdb to get backtraces for each core file.

Args:
session: Active RemoteSession instance (same as run_cmd).
process_name (str): Process name to check via pgrep (e.g. "ganesha").
cores_dir (str): Directory where core dumps are stored (e.g. /tmp/cores).
binary_path (str): Full path to the binary for gdb (e.g. /usr/bin/ganesha.nfsd).
gdb_cmd (str, optional): Custom gdb command format string. Must use
{binary_path} and {core_path} placeholders. If None, uses the default:
gdb -q -batch with debuginfod, pagination off, and "thread apply all bt full".

Returns:
str: Combined gdb backtrace output for all core files, or None if process
was running or no core files found. Caller can write this to a file.
"""
try:
logger.info("Check if %s is running", process_name)
out, code = run_cmd(session, f"pgrep {process_name}", check=False)
logger.debug("Output: %s, Code: %s", out, code)
if code != 0:
logger.error("%s is not running", process_name)
logger.info("Check for crash in %s", cores_dir)
out, code = run_cmd(session, f"ls -la {cores_dir}", check=False)
logger.debug("Output: %s, Code: %s", out, code)
list_out, _ = run_cmd(session, f"ls {cores_dir} 2>/dev/null", check=False)
core_files = [
line.strip()
for line in (list_out or "").splitlines()
if line.strip()
]
if core_files:
logger.info("Crashes found")
logger.info("Install debug packages and see for crashes")
run_cmd(session, "dnf install -y gdb")
default_gdb_cmd = (
"gdb -q -batch "
"-ex \"set debuginfod enabled on\" "
"-ex \"set pagination off\" "
"-ex \"thread apply all bt full\" "
"{binary_path} {core_path}"
)
backtraces = []
for core_name in core_files:
core_path = f"{cores_dir}/{core_name}"
cmd = (gdb_cmd if gdb_cmd is not None else default_gdb_cmd).format(
binary_path=binary_path,
core_path=core_path,
)
bt_out, _ = run_cmd(session, cmd, check=False)
section = f"--- Backtrace for {core_path} ---\n{bt_out or ''}"
backtraces.append(section)
return "\n\n".join(backtraces)
else:
logger.info("No crashes found")
return None
except Exception as e:
logger.error("Error checking for crashes: %s", e)
return None


# -----------------------
# Run remote commands
# -----------------------
Expand Down
27 changes: 25 additions & 2 deletions ci_utils/nfs_ganesha/gpfs_ganesha_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def install_ganesha(self, test_workspace: str):
else:
self._build_from_source(test_workspace)

self.coredump_setup()
self.start_ganesha_service()

# -------------------------------
Expand Down Expand Up @@ -97,7 +98,20 @@ def _build_from_source(self, test_workspace: str):
repo_name = f"codeready-builder-for-rhel-{rhel_major}-{arch.strip()}-rpms"
run_cmd(self.session, f"subscription-manager repos --enable={repo_name}")

run_cmd(self.session, f"dnf install --enablerepo={repo_name} -y {BASE_PACKAGES} {BUILDREQUIRES_EXTRA} {ADDITIONAL_PACKAGES} libacl-devel libblkid-devel libcap-devel redhat-rpm-config rpm-build libgfapi-devel xfsprogs-devel selinux-policy-devel sqlite --skip-broken")
dnf_cmd = f"dnf install --enablerepo={repo_name} -y {BASE_PACKAGES} {BUILDREQUIRES_EXTRA} {ADDITIONAL_PACKAGES} libacl-devel libblkid-devel libcap-devel redhat-rpm-config rpm-build libgfapi-devel xfsprogs-devel selinux-policy-devel sqlite --skip-broken"
max_attempts = 3
retry_delay = 30
for attempt in range(1, max_attempts + 1):
try:
run_cmd(self.session, dnf_cmd)
break
except RuntimeError as e:
if attempt == max_attempts:
raise
logger.warning("dnf install failed (attempt %d/%d): %s. Cleaning cache and retrying in %ds ...", attempt, max_attempts, e, retry_delay)
run_cmd(self.session, "dnf clean all", check=False)
time.sleep(retry_delay)

cmake_binary, _ = run_cmd(self.session, "which cmake")
build_dir = f"{test_workspace}/nfs-ganesha/build"
src_dir = f"{test_workspace}/nfs-ganesha"
Expand Down Expand Up @@ -189,7 +203,16 @@ def _build_from_source(self, test_workspace: str):
run_cmd(self.session, "systemctl daemon-reload")

logger.info("NFS-Ganesha build, install, and minimal config complete.")

# -------------------------------
# Setup coredump configuration
# -------------------------------
def coredump_setup(self):
logger.info("[STEP]: Setting up coredump configuration")
run_cmd(self.session, "sysctl -w kernel.core_pattern=/tmp/cores/core.%e.%p.%h.%t")
run_cmd(self.session, "mkdir -p /tmp/cores")
run_cmd(self.session, "cat /proc/sys/kernel/core_pattern")
logger.info("Coredump setup complete.")

# -------------------------------
# Start Ganesha service
# -------------------------------
Expand Down
14 changes: 12 additions & 2 deletions ci_utils/nfs_ganesha/vfs_nfs_ganesha_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def install_ganesha(self, test_workspace: str):
else:
self._build_from_source(test_workspace)

self.coredump_setup()
self.start_ganesha_service()

# -------------------------------
Expand Down Expand Up @@ -103,7 +104,17 @@ def _build_from_source(self, test_workspace: str):
run_cmd(self.session, "cat /etc/ganesha/ganesha.conf")

logger.info("NFS-Ganesha build, install, and minimal config complete.")


# -------------------------------
# Setup coredump configuration
# -------------------------------
def coredump_setup(self):
logger.info("[STEP]: Setting up coredump configuration")
run_cmd(self.session, "echo '/tmp/cores/core.%e.%p.%h.%t' > /proc/sys/kernel/core_pattern")
run_cmd(self.session, "mkdir -p /tmp/cores")
logger.info("Coredump setup complete.")


# -------------------------------
# Start Ganesha service
# -------------------------------
Expand All @@ -116,4 +127,3 @@ def start_ganesha_service(self):
run_cmd(self.session, "journalctl -xe", check=False)
assert False, "Failed to start nfs-ganesha service"
logger.info("NFS-Ganesha started successfully.")

2 changes: 2 additions & 0 deletions ci_utils/pynfs/pynfs_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ def collect_failures(self, outputs: List[Tuple[str, str, int]]) -> Tuple[bool, s
failure_summary.append("") # blank line
if code != 0:
return_code = code
fail_found = True
logger.error("Return code %s detected in pynfs %s", code, version)

if failure_summary:
summary_text = "\n".join(failure_summary)
Expand Down
Loading