Skip to content

Commit c89c1f6

Browse files
committed
support: diagnostic debug for flaky test
This patch adds diagnostic data and additional logging to catch the root cause for issue #1303: - Exactly when and why cleanup is called - Whether the cd /tmp command succeeds - The actual tar exit code (not just 255 from SSH) - Whether cleanup happens before tar completes (race condition) - The full collection.log showing the sequence of events Signed-off-by: Joachim Wiberg <[email protected]>
1 parent dc6e827 commit c89c1f6

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

src/bin/support

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,12 @@ cmd_collect()
129129
# Cleanup on exit
130130
cleanup()
131131
{
132+
echo "[$(date -Iseconds)] Cleanup called (signal: ${1:-EXIT})" >> "${EXEC_LOG}" 2>&1 || echo "[$(date -Iseconds)] Cleanup called (signal: ${1:-EXIT})" >&2
132133
if [ -d "${COLLECT_DIR}" ]; then
134+
echo "[$(date -Iseconds)] Removing collection directory: ${COLLECT_DIR}" >> "${EXEC_LOG}" 2>&1 || echo "[$(date -Iseconds)] Removing: ${COLLECT_DIR}" >&2
133135
rm -rf "${COLLECT_DIR}"
136+
else
137+
echo "[$(date -Iseconds)] Collection directory already gone: ${COLLECT_DIR}" >> "${EXEC_LOG}" 2>&1 || echo "[$(date -Iseconds)] Already gone: ${COLLECT_DIR}" >&2
134138
fi
135139
}
136140
trap cleanup EXIT INT TERM
@@ -383,7 +387,14 @@ cmd_collect()
383387

384388
# Create final tar.gz and output to stdout
385389
# Use -C to change to parent directory so paths in archive don't include full path
386-
cd "${WORK_DIR}"
390+
echo "[$(date -Iseconds)] Changing to work directory: ${WORK_DIR}" >> "${EXEC_LOG}" 2>&1
391+
if ! cd "${WORK_DIR}"; then
392+
echo "[$(date -Iseconds)] ERROR: Failed to cd to ${WORK_DIR}" >> "${EXEC_LOG}" 2>&1
393+
echo "Error: Cannot change to work directory ${WORK_DIR}" >&2
394+
exit 1
395+
fi
396+
echo "[$(date -Iseconds)] Successfully changed to: $(pwd)" >> "${EXEC_LOG}" 2>&1
397+
echo "[$(date -Iseconds)] Creating archive from: $(basename "${COLLECT_DIR}")" >> "${EXEC_LOG}" 2>&1
387398

388399
# Check if password encryption is requested
389400
if [ -n "$PASSWORD" ]; then
@@ -392,14 +403,29 @@ cmd_collect()
392403
exit 1
393404
fi
394405
echo "Encrypting with GPG..." >&2
406+
echo "[$(date -Iseconds)] Starting tar with GPG encryption" >> "${EXEC_LOG}" 2>&1
395407
tar czf - "$(basename "${COLLECT_DIR}")" 2>> "${EXEC_LOG}" | \
396408
gpg --batch --yes --passphrase "$PASSWORD" --pinentry-mode loopback -c 2>> "${EXEC_LOG}"
409+
tar_exit=$?
410+
echo "[$(date -Iseconds)] tar+gpg pipeline exit code: $tar_exit" >> "${EXEC_LOG}" 2>&1
397411
echo "" >&2
398412
echo "WARNING: Remember to share the encryption password out-of-band!" >&2
399413
echo " Do not send it in the same email as the encrypted file." >&2
414+
if [ $tar_exit -ne 0 ]; then
415+
echo "[$(date -Iseconds)] ERROR: tar+gpg failed with exit code $tar_exit" >> "${EXEC_LOG}" 2>&1
416+
exit $tar_exit
417+
fi
400418
else
419+
echo "[$(date -Iseconds)] Starting tar (no encryption)" >> "${EXEC_LOG}" 2>&1
401420
tar czf - "$(basename "${COLLECT_DIR}")" 2>> "${EXEC_LOG}"
421+
tar_exit=$?
422+
echo "[$(date -Iseconds)] tar exit code: $tar_exit" >> "${EXEC_LOG}" 2>&1
423+
if [ $tar_exit -ne 0 ]; then
424+
echo "[$(date -Iseconds)] ERROR: tar failed with exit code $tar_exit" >> "${EXEC_LOG}" 2>&1
425+
exit $tar_exit
426+
fi
402427
fi
428+
echo "[$(date -Iseconds)] Archive creation completed successfully" >> "${EXEC_LOG}" 2>&1
403429
}
404430

405431
cmd_clean()

test/case/misc/support_collect/test.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,21 @@
4848
stderr_output = result.stderr.decode('utf-8') if result.stderr else ""
4949
print(f"support collect failed with return code {result.returncode}")
5050
print(f"stderr: {stderr_output}")
51+
52+
# Try to retrieve the collection.log for debugging
53+
print("\n=== Attempting to retrieve collection.log for debugging ===")
54+
try:
55+
log_result = tgtssh.run("find /tmp -name 'support-*' -type d -exec cat {}/collection.log \\; 2>/dev/null || echo 'No collection.log found'",
56+
stdout=subprocess.PIPE,
57+
stderr=subprocess.PIPE,
58+
timeout=10,
59+
check=False)
60+
if log_result.stdout:
61+
log_output = log_result.stdout.decode('utf-8')
62+
print(f"collection.log contents:\n{log_output}")
63+
except Exception as e:
64+
print(f"Could not retrieve collection.log: {e}")
65+
5166
raise Exception("support collect command failed")
5267

5368
with test.step("Verify tarball was created and is valid"):
@@ -110,6 +125,21 @@
110125
if result.returncode != 0:
111126
stderr_output = result.stderr.decode('utf-8') if result.stderr else ""
112127
print(f"support collect with encryption failed: {stderr_output}")
128+
129+
# Try to retrieve the collection.log for debugging
130+
print("\n=== Attempting to retrieve collection.log for debugging ===")
131+
try:
132+
log_result = tgtssh.run("find /tmp -name 'support-*' -type d -exec cat {}/collection.log \\; 2>/dev/null || echo 'No collection.log found'",
133+
stdout=subprocess.PIPE,
134+
stderr=subprocess.PIPE,
135+
timeout=10,
136+
check=False)
137+
if log_result.stdout:
138+
log_output = log_result.stdout.decode('utf-8')
139+
print(f"collection.log contents:\n{log_output}")
140+
except Exception as e:
141+
print(f"Could not retrieve collection.log: {e}")
142+
113143
raise Exception("support collect with --password failed")
114144

115145
with test.step("Verify encrypted file and decrypt it"):

0 commit comments

Comments
 (0)