diff --git a/.github/workflows/test-backend-arm.yml b/.github/workflows/test-backend-arm.yml index e57be2704a2..bee74fee172 100644 --- a/.github/workflows/test-backend-arm.yml +++ b/.github/workflows/test-backend-arm.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml index c6970ddff61..247f9576595 100644 --- a/.github/workflows/test-backend-coreml.yml +++ b/.github/workflows/test-backend-coreml.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml index 00933d6c74e..907c4d2dac0 100644 --- a/.github/workflows/test-backend-qnn.yml +++ b/.github/workflows/test-backend-qnn.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml index f04fdcdd1f1..cb2478fc825 100644 --- a/.github/workflows/test-backend-vulkan.yml +++ b/.github/workflows/test-backend-vulkan.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml index 2ae423dd99b..086c9625a38 100644 --- a/.github/workflows/test-backend-xnnpack.yml +++ b/.github/workflows/test-backend-xnnpack.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py index 73da8fba678..e54fc691723 100644 --- a/backends/test/suite/generate_markdown_summary.py +++ b/backends/test/suite/generate_markdown_summary.py @@ -1,44 +1,69 @@ import argparse import csv +import json import sys -# -# A standalone script to generate a Markdown representation of a test report. -# This is primarily intended to be used with GitHub actions to generate a nice -# representation of the test results when looking at the action run. -# -# Usage: python executorch/backends/test/suite/generate_markdown_summary.py -# Markdown is written to stdout. -# +from dataclasses import dataclass, field -def escape_for_markdown(text: str) -> str: +@dataclass +class ResultCounts: """ - Modify a string to properly display in a markdown table cell. + Represents aggregated result counts for each status. """ - if not text: - return text - # Replace newlines with
tags - escaped = text.replace("\n", "
") + total: int = 0 + passes: int = 0 + fails: int = 0 + skips: int = 0 + by_detail: dict[str, int] = field(default_factory=lambda: {}) - # Escape backslashes. - escaped = escaped.replace("\\", "\\\\") + def add_row(self, result_value: str, result_detail: str) -> None: + """ + Update the result counts for the specified row. + """ - # Escape pipe characters that would break table structure - escaped = escaped.replace("|", "\\|") + self.total += 1 - return escaped + if result_value == "Pass": + self.passes += 1 + elif result_value == "Fail": + self.fails += 1 + elif result_value == "Skip": + self.skips += 1 + else: + raise RuntimeError(f"Unknown result value {result_value}") + if result_detail: + if result_detail not in self.by_detail: + self.by_detail[result_detail] = 0 + + self.by_detail[result_detail] += 1 + + +@dataclass +class AggregatedSummary: + """ + Represents aggegrated summary data for the test run. + """ + + counts: ResultCounts + counts_by_params: dict[str, ResultCounts] + failed_tests: list[list[str]] + header: list[str] + + +# +# A standalone script to generate a Markdown representation of a test report. +# This is primarily intended to be used with GitHub actions to generate a nice +# representation of the test results when looking at the action run. +# +# Usage: python executorch/backends/test/suite/generate_markdown_summary.py +# Markdown is written to stdout. +# -def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) - # Print warning if exit code is non-zero - if exit_code != 0: - print("> [!WARNING]") - print( - f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n" - ) +def aggregate_results(csv_path: str) -> AggregatedSummary: with open(csv_path, newline="", encoding="utf-8") as f: reader = csv.reader(f) rows = list(reader) @@ -46,24 +71,28 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) header = rows[0] data_rows = rows[1:] - # Find the Result and Result Detail column indices - result_column_index = None - result_detail_column_index = None - for i, col in enumerate(header): - if col.lower() == "result": - result_column_index = i - elif col.lower() == "result detail": - result_detail_column_index = i + header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)} + params_column_index = header_indices_by_name.get("params", None) + result_column_index = header_indices_by_name["result"] + result_detail_column_index = header_indices_by_name["result detail"] # Count results and prepare data - pass_count = 0 - fail_count = 0 - skip_count = 0 + counts = ResultCounts() failed_tests = [] - processed_rows = [] - result_detail_counts = {} + counts_by_param = {} for row in data_rows: + result = row[result_column_index] + result_detail = row[result_detail_column_index] + + counts.add_row(result, result_detail) + + params = row[params_column_index] if params_column_index else None + if params: + if params not in counts_by_param: + counts_by_param[params] = ResultCounts() + counts_by_param[params].add_row(result, result_detail) + # Make a copy of the row to avoid modifying the original processed_row = [escape_for_markdown(cell) for cell in row] @@ -71,54 +100,130 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) if result_column_index is not None and result_column_index < len(row): result_value = row[result_column_index].strip().lower() if result_value == "pass": - pass_count += 1 processed_row[result_column_index] = ( 'Pass' ) elif result_value == "fail": - fail_count += 1 processed_row[result_column_index] = ( 'Fail' ) failed_tests.append(processed_row.copy()) elif result_value == "skip": - skip_count += 1 processed_row[result_column_index] = ( 'Skip' ) - # Count result details (excluding empty ones) - if result_detail_column_index is not None and result_detail_column_index < len( - row - ): - result_detail_value = row[result_detail_column_index].strip() - if result_detail_value: # Only count non-empty result details - if result_detail_value in result_detail_counts: - result_detail_counts[result_detail_value] += 1 - else: - result_detail_counts[result_detail_value] = 1 + return AggregatedSummary( + counts=counts, + failed_tests=failed_tests, + counts_by_params=counts_by_param, + header=header, + ) + + +def escape_for_markdown(text: str) -> str: + """ + Modify a string to properly display in a markdown table cell. + """ + if not text: + return text + + # Replace newlines with
tags + escaped = text.replace("\n", "
") - processed_rows.append(processed_row) + # Escape backslashes. + escaped = escaped.replace("\\", "\\\\") + + # Escape pipe characters that would break table structure + escaped = escaped.replace("|", "\\|") + + return escaped + + +def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) + # Print warning if exit code is non-zero + if exit_code != 0: + print("> [!WARNING]") + print( + f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n" + ) + + results = aggregate_results(csv_path) # Generate Summary section - total_rows = len(data_rows) print("# Summary\n") - print(f"- **Pass**: {pass_count}/{total_rows}") - print(f"- **Fail**: {fail_count}/{total_rows}") - print(f"- **Skip**: {skip_count}/{total_rows}") + total_excluding_skips = results.counts.passes + results.counts.fails + pass_fraction = results.counts.passes / total_excluding_skips + fail_fraction = results.counts.fails / total_excluding_skips + print( + f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)" + ) + print( + f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)" + ) + print(f"- **Skip**: {results.counts.skips}") + + if results.counts_by_params: + print("\n## Results by Parameters\n") + + # Extract all unique parameter keys from the JSON strings + all_param_keys = set() + parsed_params = {} + + for params_str in results.counts_by_params.keys(): + # Parse the JSON string (it's a string representation of a dict) + params_dict = json.loads(params_str) + parsed_params[params_str] = params_dict + all_param_keys.update(params_dict.keys()) + + if parsed_params and len(parsed_params) > 1: + # Sort parameter keys for consistent column ordering + sorted_param_keys = sorted(all_param_keys) + + # Create table header + header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"] + print("| " + " | ".join(header_cols) + " |") + print("|" + "|".join(["---"] * len(header_cols)) + "|") + + # Create table rows + for params_str, counts in results.counts_by_params.items(): + if params_str in parsed_params: + params_dict = parsed_params[params_str] + row_values = [] + + # Add parameter values + for key in sorted_param_keys: + value = params_dict.get(key, "") + row_values.append(str(value)) + + pass_fraction = counts.passes / (counts.passes + counts.fails) + + # Add count values + row_values.extend( + [ + str(counts.passes), + str(counts.fails), + str(counts.skips), + f"{pass_fraction*100:.2f}%", + ] + ) + + print("| " + " | ".join(row_values) + " |") + + print() print("## Failure Breakdown:") - total_rows_with_result_detail = sum(result_detail_counts.values()) - for detail, count in sorted(result_detail_counts.items()): + total_rows_with_result_detail = sum(results.counts.by_detail.values()) + for detail, count in sorted(results.counts.by_detail.items()): print(f"- **{detail}**: {count}/{total_rows_with_result_detail}") # Generate Failed Tests section print("# Failed Tests\n") - if failed_tests: - escaped_header = [escape_for_markdown(col) for col in header] + if results.failed_tests: + escaped_header = [escape_for_markdown(col) for col in results.header] print("| " + " | ".join(escaped_header) + " |") - print("|" + "|".join(["---"] * len(header)) + "|") - for row in failed_tests: + print("|" + "|".join(["---"] * len(results.header)) + "|") + for row in results.failed_tests: print("| " + " | ".join(row) + " |") else: print("No failed tests.\n") diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index cdf2ce870e1..09e950ab672 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,4 +1,5 @@ import csv +import json from collections import Counter from dataclasses import dataclass, field @@ -343,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None: def _serialize_params(params: dict[str, Any] | None) -> str: if params is not None: - return str(dict(sorted(params.items()))) + # Convert values to strings - JSON conversion doesn't like dtypes. + str_params = {k: str(v) for k, v in params.items()} + return json.dumps(str_params) else: return "" diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index eeea09e0fc1..a6d7d07bce0 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -57,7 +57,7 @@ def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bo and node.target == exir_ops.edge.aten.convolution.default ): in_rank = node.args[0].meta["val"].dim() - if in_rank != 4: + if in_rank > 4: return True return False diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 58ff76cba17..e42681fc678 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -1,3 +1,4 @@ +import json import unittest from csv import DictReader @@ -102,14 +103,16 @@ def test_csv_report_simple(self): self.assertEqual(records[2]["Test Case"], "test2") self.assertEqual(records[2]["Flow"], "flow1") self.assertEqual(records[2]["Result"], "Pass") - self.assertEqual(records[2]["Params"], str({"dtype": torch.float32})) + self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"})) # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") self.assertEqual(records[3]["Flow"], "flow1") self.assertEqual(records[3]["Result"], "Skip") - self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True})) + self.assertEqual( + records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"}) + ) def test_count_ops(self): """