From c539d621ebd1dccc1379acfe7bc7a2cafba64363 Mon Sep 17 00:00:00 2001 From: Andy Xie Date: Thu, 26 Mar 2026 19:09:56 +0800 Subject: [PATCH 1/3] Extract stdout/stderr/cmd/rc from failed tasks in step1 parser The job parser previously only captured res.msg from failed task events, which is often just a generic message like "non-zero return code". Now extracts res.stdout, res.stderr, res.cmd, and res.rc when present, with a fallback to event-level stdout when res fields are missing/suppressed. Closes redhat-et/rhdp-rca-plugin#15 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../schemas/job_context.schema.json | 6 +- .../root-cause-analysis/scripts/job_parser.py | 37 ++++++--- .../tests/data/sample_job.json | 6 +- .../tests/scripts/test_job_parser.py | 76 +++++++++++++++++++ 4 files changed, 111 insertions(+), 14 deletions(-) diff --git a/skills/root-cause-analysis/schemas/job_context.schema.json b/skills/root-cause-analysis/schemas/job_context.schema.json index 2b4a8b8..187fc9e 100644 --- a/skills/root-cause-analysis/schemas/job_context.schema.json +++ b/skills/root-cause-analysis/schemas/job_context.schema.json @@ -66,7 +66,11 @@ "error_message": {"type": "string"}, "task_path": {"type": "string"}, "task_action": {"type": "string"}, - "duration": {"type": "number"} + "duration": {"type": "number"}, + "stdout": {"type": ["string", "array"], "description": "stdout from the failed task result or event"}, + "stderr": {"type": "string", "description": "stderr from the failed task result"}, + "cmd": {"type": ["string", "array"], "description": "Command that was executed"}, + "rc": {"type": "integer", "description": "Return code of the failed command"} } } }, diff --git a/skills/root-cause-analysis/scripts/job_parser.py b/skills/root-cause-analysis/scripts/job_parser.py index be4ccdf..1b39920 100644 --- a/skills/root-cause-analysis/scripts/job_parser.py +++ b/skills/root-cause-analysis/scripts/job_parser.py @@ -165,18 +165,31 @@ def _extract_failed_tasks(events: list[dict]) -> list[dict[str, Any]]: event_data = event.get("event_data", {}) res = event_data.get("res", {}) - failed.append( - { - "task": event.get("task", ""), - "play": event.get("play", ""), - "role": event.get("role", ""), - "timestamp": event.get("created", ""), - "error_message": res.get("msg", "") if isinstance(res, dict) else str(res), - "task_path": event_data.get("task_path", ""), - "task_action": event_data.get("task_action", ""), - "duration": event_data.get("duration", 0), - } - ) + task_info = { + "task": event.get("task", ""), + "play": event.get("play", ""), + "role": event.get("role", ""), + "timestamp": event.get("created", ""), + "error_message": res.get("msg", "") if isinstance(res, dict) else str(res), + "task_path": event_data.get("task_path", ""), + "task_action": event_data.get("task_action", ""), + "duration": event_data.get("duration", 0), + } + + # Extract diagnostic details from res when available + if isinstance(res, dict): + for field in ("stdout", "stderr", "cmd", "rc"): + value = res.get(field) + if value is not None and value != "": + task_info[field] = value + + # Fallback: capture event-level stdout when res fields are missing + if "stdout" not in task_info: + event_stdout = event.get("stdout", "") + if event_stdout: + task_info["stdout"] = event_stdout + + failed.append(task_info) return failed diff --git a/skills/root-cause-analysis/tests/data/sample_job.json b/skills/root-cause-analysis/tests/data/sample_job.json index ff7c35c..72f28be 100644 --- a/skills/root-cause-analysis/tests/data/sample_job.json +++ b/skills/root-cause-analysis/tests/data/sample_job.json @@ -35,7 +35,11 @@ "created": "2023-10-27T10:04:00Z", "event_data": { "res": { - "msg": "Package not found" + "msg": "Package not found", + "rc": 1, + "cmd": ["yum", "install", "-y", "missing-package"], + "stderr": "Error: No matching packages to install", + "stdout": "" }, "task_action": "yum", "duration": 5.2, diff --git a/skills/root-cause-analysis/tests/scripts/test_job_parser.py b/skills/root-cause-analysis/tests/scripts/test_job_parser.py index 3a88160..b47d11e 100644 --- a/skills/root-cause-analysis/tests/scripts/test_job_parser.py +++ b/skills/root-cause-analysis/tests/scripts/test_job_parser.py @@ -202,6 +202,11 @@ def test_extract_failed_tasks_basic(sample_job_data): assert task["error_message"] == "Package not found" assert task["task_action"] == "yum" assert task["duration"] == 5.2 + assert task["rc"] == 1 + assert task["cmd"] == ["yum", "install", "-y", "missing-package"] + assert task["stderr"] == "Error: No matching packages to install" + # stdout in res is empty string, so it should not be included from res + assert "stdout" not in task or task.get("stdout") != "" def test_extract_failed_tasks_ignores_non_failures(): @@ -228,6 +233,77 @@ def test_extract_failed_tasks_string_res(): assert failed[0]["error_message"] == "Simple error string" +def test_extract_failed_tasks_with_diagnostic_fields(): + """Test that stdout, stderr, cmd, and rc are extracted from res.""" + events = [ + { + "event": "runner_on_failed", + "failed": True, + "task": "Git checkout", + "event_data": { + "res": { + "msg": "non-zero return code", + "rc": 1, + "cmd": ["git", "checkout", "cert-manager-fallback"], + "stdout": "", + "stderr": "error: pathspec 'cert-manager-fallback' did not match any file(s) known to git", + }, + }, + } + ] + failed = _extract_failed_tasks(events) + assert len(failed) == 1 + task = failed[0] + assert task["error_message"] == "non-zero return code" + assert task["rc"] == 1 + assert task["cmd"] == ["git", "checkout", "cert-manager-fallback"] + assert task["stderr"] == "error: pathspec 'cert-manager-fallback' did not match any file(s) known to git" + # stdout is empty string in res, so should not be in task_info from res + # but event has no stdout either, so stdout key should be absent + assert "stdout" not in task + + +def test_extract_failed_tasks_event_stdout_fallback(): + """Test fallback to event-level stdout when res has no stdout.""" + events = [ + { + "event": "runner_on_failed", + "failed": True, + "task": "Run script", + "stdout": "TASK [run_script] fatal: host unreachable", + "event_data": { + "res": { + "msg": "Connection timed out", + }, + }, + } + ] + failed = _extract_failed_tasks(events) + assert len(failed) == 1 + task = failed[0] + assert task["stdout"] == "TASK [run_script] fatal: host unreachable" + + +def test_extract_failed_tasks_res_stdout_takes_priority(): + """Test that res.stdout takes priority over event stdout.""" + events = [ + { + "event": "runner_on_failed", + "failed": True, + "task": "Run command", + "stdout": "rendered ansible output", + "event_data": { + "res": { + "msg": "failed", + "stdout": "actual command output from res", + }, + }, + } + ] + failed = _extract_failed_tasks(events) + assert failed[0]["stdout"] == "actual command output from res" + + def test_extract_failed_tasks_no_failures(): """Test that empty list is returned for job with no failures.""" events = [{"event": "runner_on_ok", "task": "OK Task"}] From 489caf854c9703e31973271d62c5beafc430a732 Mon Sep 17 00:00:00 2001 From: Andy Xie Date: Thu, 26 Mar 2026 22:13:46 +0800 Subject: [PATCH 2/3] Handle suppressed res fields by parsing JSON from rendered event.stdout When Ansible suppresses res fields (e.g. _ansible_no_log), stdout/stderr/cmd are missing from the structured result but still embedded as a JSON blob in the ANSI-rendered event.stdout. This adds a fallback parser that extracts structured fields from the rendered output before falling back to raw stdout. Fixes: redhat-et/rhdp-rca-plugin#15 Co-Authored-By: Claude Opus 4.6 (1M context) --- .../root-cause-analysis/scripts/job_parser.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/skills/root-cause-analysis/scripts/job_parser.py b/skills/root-cause-analysis/scripts/job_parser.py index 1b39920..586f52e 100644 --- a/skills/root-cause-analysis/scripts/job_parser.py +++ b/skills/root-cause-analysis/scripts/job_parser.py @@ -156,6 +156,27 @@ def _extract_pod_references(events: list[dict]) -> list[dict[str, str]]: return pod_refs +def _parse_result_from_rendered_output(rendered: str) -> dict[str, Any] | None: + """Parse structured result fields from ANSI-rendered Ansible output. + + When res fields are suppressed (e.g. _ansible_no_log), the diagnostic + data may still be available in the rendered event.stdout as a JSON blob + after 'FAILED! =>' or 'SUCCESS =>'. + """ + # Strip ANSI escape codes + clean = re.sub(r"\x1b\[[0-9;]*m", "", rendered) + # Extract JSON blob after "=>" + match = re.search(r"=>\s*(\{.*\})\s*$", clean, re.DOTALL) + if match: + try: + result = json.loads(match.group(1)) + if isinstance(result, dict): + return result + except (json.JSONDecodeError, ValueError): + pass + return None + + def _extract_failed_tasks(events: list[dict]) -> list[dict[str, Any]]: """Extract failed tasks with error details.""" failed = [] @@ -183,11 +204,20 @@ def _extract_failed_tasks(events: list[dict]) -> list[dict[str, Any]]: if value is not None and value != "": task_info[field] = value - # Fallback: capture event-level stdout when res fields are missing - if "stdout" not in task_info: + # Fallback: parse structured fields from rendered event.stdout + # when res fields are suppressed (e.g. _ansible_no_log) + if "stdout" not in task_info or "stderr" not in task_info: event_stdout = event.get("stdout", "") if event_stdout: - task_info["stdout"] = event_stdout + parsed = _parse_result_from_rendered_output(event_stdout) + if parsed: + for field in ("stdout", "stderr", "cmd"): + if field not in task_info and field in parsed: + task_info[field] = parsed[field] + + # Final fallback: raw event stdout if nothing was parsed + if "stdout" not in task_info: + task_info["stdout"] = event_stdout failed.append(task_info) From aa79fcb9707e7454dd0d0c3e5343113140b6f9f5 Mon Sep 17 00:00:00 2001 From: Andy Xie Date: Thu, 26 Mar 2026 22:28:51 +0800 Subject: [PATCH 3/3] Fix ruff formatting in test_job_parser.py Co-Authored-By: Claude Opus 4.6 (1M context) --- skills/root-cause-analysis/tests/scripts/test_job_parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skills/root-cause-analysis/tests/scripts/test_job_parser.py b/skills/root-cause-analysis/tests/scripts/test_job_parser.py index b47d11e..ac0cd88 100644 --- a/skills/root-cause-analysis/tests/scripts/test_job_parser.py +++ b/skills/root-cause-analysis/tests/scripts/test_job_parser.py @@ -257,7 +257,10 @@ def test_extract_failed_tasks_with_diagnostic_fields(): assert task["error_message"] == "non-zero return code" assert task["rc"] == 1 assert task["cmd"] == ["git", "checkout", "cert-manager-fallback"] - assert task["stderr"] == "error: pathspec 'cert-manager-fallback' did not match any file(s) known to git" + assert ( + task["stderr"] + == "error: pathspec 'cert-manager-fallback' did not match any file(s) known to git" + ) # stdout is empty string in res, so should not be in task_info from res # but event has no stdout either, so stdout key should be absent assert "stdout" not in task