Skip to content

Commit ff5b6a6

Browse files
Merge pull request #134 from Manimaran-MM/test_06022026
(enhancement): gatecheck - Coredump/Crash related enhancements
2 parents ad7888c + 0cebfe3 commit ff5b6a6

File tree

5 files changed

+332
-27
lines changed

5 files changed

+332
-27
lines changed

ci_utils/common/helpers.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,78 @@ def read_json_file(json_path: str) -> Dict[str, Any]:
113113
logger.error("Failed to parse JSON file %s: %s", json_path, e)
114114
raise
115115

116+
# -----------------------
117+
# Check for process crash and dump backtraces from core files
118+
# -----------------------
119+
def check_process_crash_and_backtrace(
120+
session,
121+
process_name="ganesha",
122+
cores_dir="/tmp/cores",
123+
binary_path="/usr/bin/ganesha.nfsd",
124+
gdb_cmd=None,
125+
):
126+
"""
127+
Check if a process is running; if not, look for core dumps in cores_dir,
128+
install gdb if needed, and run gdb to get backtraces for each core file.
129+
130+
Args:
131+
session: Active RemoteSession instance (same as run_cmd).
132+
process_name (str): Process name to check via pgrep (e.g. "ganesha").
133+
cores_dir (str): Directory where core dumps are stored (e.g. /tmp/cores).
134+
binary_path (str): Full path to the binary for gdb (e.g. /usr/bin/ganesha.nfsd).
135+
gdb_cmd (str, optional): Custom gdb command format string. Must use
136+
{binary_path} and {core_path} placeholders. If None, uses the default:
137+
gdb -q -batch with debuginfod, pagination off, and "thread apply all bt full".
138+
139+
Returns:
140+
str: Combined gdb backtrace output for all core files, or None if process
141+
was running or no core files found. Caller can write this to a file.
142+
"""
143+
try:
144+
logger.info("Check if %s is running", process_name)
145+
out, code = run_cmd(session, f"pgrep {process_name}", check=False)
146+
logger.debug("Output: %s, Code: %s", out, code)
147+
if code != 0:
148+
logger.error("%s is not running", process_name)
149+
logger.info("Check for crash in %s", cores_dir)
150+
out, code = run_cmd(session, f"ls -la {cores_dir}", check=False)
151+
logger.debug("Output: %s, Code: %s", out, code)
152+
list_out, _ = run_cmd(session, f"ls {cores_dir} 2>/dev/null", check=False)
153+
core_files = [
154+
line.strip()
155+
for line in (list_out or "").splitlines()
156+
if line.strip()
157+
]
158+
if core_files:
159+
logger.info("Crashes found")
160+
logger.info("Install debug packages and see for crashes")
161+
run_cmd(session, "dnf install -y gdb")
162+
default_gdb_cmd = (
163+
"gdb -q -batch "
164+
"-ex \"set debuginfod enabled on\" "
165+
"-ex \"set pagination off\" "
166+
"-ex \"thread apply all bt full\" "
167+
"{binary_path} {core_path}"
168+
)
169+
backtraces = []
170+
for core_name in core_files:
171+
core_path = f"{cores_dir}/{core_name}"
172+
cmd = (gdb_cmd if gdb_cmd is not None else default_gdb_cmd).format(
173+
binary_path=binary_path,
174+
core_path=core_path,
175+
)
176+
bt_out, _ = run_cmd(session, cmd, check=False)
177+
section = f"--- Backtrace for {core_path} ---\n{bt_out or ''}"
178+
backtraces.append(section)
179+
return "\n\n".join(backtraces)
180+
else:
181+
logger.info("No crashes found")
182+
return None
183+
except Exception as e:
184+
logger.error("Error checking for crashes: %s", e)
185+
return None
186+
187+
116188
# -----------------------
117189
# Run remote commands
118190
# -----------------------

ci_utils/nfs_ganesha/gpfs_ganesha_setup.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def install_ganesha(self, test_workspace: str):
5858
else:
5959
self._build_from_source(test_workspace)
6060

61+
self.coredump_setup()
6162
self.start_ganesha_service()
6263

6364
# -------------------------------
@@ -97,7 +98,20 @@ def _build_from_source(self, test_workspace: str):
9798
repo_name = f"codeready-builder-for-rhel-{rhel_major}-{arch.strip()}-rpms"
9899
run_cmd(self.session, f"subscription-manager repos --enable={repo_name}")
99100

100-
run_cmd(self.session, f"dnf install --enablerepo={repo_name} -y {BASE_PACKAGES} {BUILDREQUIRES_EXTRA} {ADDITIONAL_PACKAGES} libacl-devel libblkid-devel libcap-devel redhat-rpm-config rpm-build libgfapi-devel xfsprogs-devel selinux-policy-devel sqlite --skip-broken")
101+
dnf_cmd = f"dnf install --enablerepo={repo_name} -y {BASE_PACKAGES} {BUILDREQUIRES_EXTRA} {ADDITIONAL_PACKAGES} libacl-devel libblkid-devel libcap-devel redhat-rpm-config rpm-build libgfapi-devel xfsprogs-devel selinux-policy-devel sqlite --skip-broken"
102+
max_attempts = 3
103+
retry_delay = 30
104+
for attempt in range(1, max_attempts + 1):
105+
try:
106+
run_cmd(self.session, dnf_cmd)
107+
break
108+
except RuntimeError as e:
109+
if attempt == max_attempts:
110+
raise
111+
logger.warning("dnf install failed (attempt %d/%d): %s. Cleaning cache and retrying in %ds ...", attempt, max_attempts, e, retry_delay)
112+
run_cmd(self.session, "dnf clean all", check=False)
113+
time.sleep(retry_delay)
114+
101115
cmake_binary, _ = run_cmd(self.session, "which cmake")
102116
build_dir = f"{test_workspace}/nfs-ganesha/build"
103117
src_dir = f"{test_workspace}/nfs-ganesha"
@@ -189,7 +203,16 @@ def _build_from_source(self, test_workspace: str):
189203
run_cmd(self.session, "systemctl daemon-reload")
190204

191205
logger.info("NFS-Ganesha build, install, and minimal config complete.")
192-
206+
# -------------------------------
207+
# Setup coredump configuration
208+
# -------------------------------
209+
def coredump_setup(self):
210+
logger.info("[STEP]: Setting up coredump configuration")
211+
run_cmd(self.session, "sysctl -w kernel.core_pattern=/tmp/cores/core.%e.%p.%h.%t")
212+
run_cmd(self.session, "mkdir -p /tmp/cores")
213+
run_cmd(self.session, "cat /proc/sys/kernel/core_pattern")
214+
logger.info("Coredump setup complete.")
215+
193216
# -------------------------------
194217
# Start Ganesha service
195218
# -------------------------------

ci_utils/nfs_ganesha/vfs_nfs_ganesha_setup.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def install_ganesha(self, test_workspace: str):
2626
else:
2727
self._build_from_source(test_workspace)
2828

29+
self.coredump_setup()
2930
self.start_ganesha_service()
3031

3132
# -------------------------------
@@ -103,7 +104,17 @@ def _build_from_source(self, test_workspace: str):
103104
run_cmd(self.session, "cat /etc/ganesha/ganesha.conf")
104105

105106
logger.info("NFS-Ganesha build, install, and minimal config complete.")
106-
107+
108+
# -------------------------------
109+
# Setup coredump configuration
110+
# -------------------------------
111+
def coredump_setup(self):
112+
logger.info("[STEP]: Setting up coredump configuration")
113+
run_cmd(self.session, "echo '/tmp/cores/core.%e.%p.%h.%t' > /proc/sys/kernel/core_pattern")
114+
run_cmd(self.session, "mkdir -p /tmp/cores")
115+
logger.info("Coredump setup complete.")
116+
117+
107118
# -------------------------------
108119
# Start Ganesha service
109120
# -------------------------------
@@ -116,4 +127,3 @@ def start_ganesha_service(self):
116127
run_cmd(self.session, "journalctl -xe", check=False)
117128
assert False, "Failed to start nfs-ganesha service"
118129
logger.info("NFS-Ganesha started successfully.")
119-

ci_utils/pynfs/pynfs_setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ def collect_failures(self, outputs: List[Tuple[str, str, int]]) -> Tuple[bool, s
236236
failure_summary.append("") # blank line
237237
if code != 0:
238238
return_code = code
239+
fail_found = True
240+
logger.error("Return code %s detected in pynfs %s", code, version)
239241

240242
if failure_summary:
241243
summary_text = "\n".join(failure_summary)

0 commit comments

Comments
 (0)