Skip to content

[Backend Tester] Add CSV report generation #12741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: gh/GregoryComer/87/head
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion backends/test/suite/context.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# Test run context management. This is used to determine the test context for reporting
# purposes.
class TestContext:
def __init__(self, test_name: str, flow_name: str, params: dict | None):
def __init__(
self, test_name: str, test_base_name: str, flow_name: str, params: dict | None
):
self.test_name = test_name
self.test_base_name = test_base_name
self.flow_name = flow_name
self.params = params

Expand Down
13 changes: 7 additions & 6 deletions backends/test/suite/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,19 @@ def _create_test(
dtype: torch.dtype,
use_dynamic_shapes: bool,
):
dtype_name = str(dtype)[6:] # strip "torch."
test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}"
if use_dynamic_shapes:
test_name += "_dynamic_shape"

def wrapped_test(self):
params = {
"dtype": dtype,
"use_dynamic_shapes": use_dynamic_shapes,
}
with TestContext(test_name, flow.name, params):
with TestContext(test_name, test_func.__name__, flow.name, params):
test_func(self, flow, dtype, use_dynamic_shapes)

dtype_name = str(dtype)[6:] # strip "torch."
test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}"
if use_dynamic_shapes:
test_name += "_dynamic_shape"

wrapped_test._name = test_func.__name__ # type: ignore
wrapped_test._flow = flow # type: ignore

Expand Down Expand Up @@ -118,6 +118,7 @@ def run_model_test(
inputs,
flow,
context.test_name,
context.test_base_name,
context.params,
dynamic_shapes=dynamic_shapes,
)
Expand Down
16 changes: 11 additions & 5 deletions backends/test/suite/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# pyre-unsafe

import copy
import os
import unittest

Expand Down Expand Up @@ -90,12 +91,13 @@ def _expand_test(cls, test_name: str):
def _make_wrapped_test(
test_func: Callable,
test_name: str,
test_base_name: str,
flow: TestFlow,
params: dict | None = None,
):
def wrapped_test(self):
with TestContext(test_name, flow.name, params):
test_kwargs = params or {}
with TestContext(test_name, test_base_name, flow.name, params):
test_kwargs = copy.copy(params) or {}
test_kwargs["flow"] = flow

test_func(self, **test_kwargs)
Expand All @@ -114,19 +116,22 @@ def _create_test_for_backend(
test_type = getattr(test_func, "test_type", TestType.STANDARD)

if test_type == TestType.STANDARD:
wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow)
test_name = f"{test_func.__name__}_{flow.name}"
wrapped_test = _make_wrapped_test(
test_func, test_name, test_func.__name__, flow
)
setattr(cls, test_name, wrapped_test)
elif test_type == TestType.DTYPE:
for dtype in DTYPES:
dtype_name = str(dtype)[6:] # strip "torch."
test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}"
wrapped_test = _make_wrapped_test(
test_func,
test_name,
test_func.__name__,
flow,
{"dtype": dtype},
)
dtype_name = str(dtype)[6:] # strip "torch."
test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}"
setattr(cls, test_name, wrapped_test)
else:
raise NotImplementedError(f"Unknown test type {test_type}.")
Expand All @@ -146,6 +151,7 @@ def _test_op(
inputs,
flow,
context.test_name,
context.test_base_name,
context.params,
generate_random_test_inputs=generate_random_test_inputs,
)
Expand Down
53 changes: 51 additions & 2 deletions backends/test/suite/reporting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import csv
from collections import Counter
from dataclasses import dataclass
from enum import IntEnum
from functools import reduce
from typing import TextIO


class TestResult(IntEnum):
Expand Down Expand Up @@ -76,12 +79,18 @@ class TestCaseSummary:
Contains summary results for the execution of a single test case.
"""

name: str
""" The qualified name of the test, not including the flow suffix. """
backend: str
""" The name of the target backend. """

base_name: str
""" The base name of the test, not including flow or parameter suffixes. """

flow: str
""" The backend-specific flow name. Corresponds to flows registered in backends/test/suite/__init__.py. """

name: str
""" The full name of test, including flow and parameter suffixes. """

params: dict | None
""" Test-specific parameters, such as dtype. """

Expand Down Expand Up @@ -162,3 +171,43 @@ def complete_test_session() -> RunSummary:
_active_session = None

return summary


def generate_csv_report(summary: RunSummary, output: TextIO):
"""Write a run summary report to a file in CSV format."""

field_names = [
"Test ID",
"Test Case",
"Backend",
"Flow",
"Result",
]

# Tests can have custom parameters. We'll want to report them here, so we need
# a list of all unique parameter names.
param_names = reduce(
lambda a, b: a.union(b),
(
set(s.params.keys())
for s in summary.test_case_summaries
if s.params is not None
),
set(),
)
field_names += (s.capitalize() for s in param_names)

writer = csv.DictWriter(output, field_names)
writer.writeheader()

for record in summary.test_case_summaries:
row = {
"Test ID": record.name,
"Test Case": record.base_name,
"Backend": record.backend,
"Flow": record.flow,
"Result": record.result.display_name(),
}
if record.params is not None:
row.update({k.capitalize(): v for k, v in record.params.items()})
writer.writerow(row)
17 changes: 16 additions & 1 deletion backends/test/suite/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from executorch.backends.test.suite.reporting import (
begin_test_session,
complete_test_session,
generate_csv_report,
RunSummary,
TestCaseSummary,
TestResult,
Expand All @@ -31,6 +32,7 @@ def run_test( # noqa: C901
inputs: Any,
flow: TestFlow,
test_name: str,
test_base_name: str,
params: dict | None,
dynamic_shapes: Any | None = None,
generate_random_test_inputs: bool = True,
Expand All @@ -45,8 +47,10 @@ def build_result(
result: TestResult, error: Exception | None = None
) -> TestCaseSummary:
return TestCaseSummary(
name=test_name,
backend=flow.backend,
base_name=test_base_name,
flow=flow.name,
name=test_name,
params=params,
result=result,
error=error,
Expand Down Expand Up @@ -171,6 +175,12 @@ def parse_args():
parser.add_argument(
"-f", "--filter", nargs="?", help="A regular expression filter for test names."
)
parser.add_argument(
"-r",
"--report",
nargs="?",
help="A file to write the test report to, in CSV format.",
)
return parser.parse_args()


Expand Down Expand Up @@ -199,6 +209,11 @@ def runner_main():
summary = complete_test_session()
print_summary(summary)

if args.report is not None:
with open(args.report, "w") as f:
print(f"Writing CSV report to {args.report}.")
generate_csv_report(summary, f)


if __name__ == "__main__":
runner_main()
3 changes: 3 additions & 0 deletions backends/test/suite/tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Tests

This directory contains meta-tests for the backend test suite. As the test suite contains a non-neglible amount of logic, these tests are useful to ensure that the test suite itself is working correctly.
Empty file.
106 changes: 106 additions & 0 deletions backends/test/suite/tests/test_reporting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import unittest

from csv import DictReader
from io import StringIO

import torch

from ..reporting import (
generate_csv_report,
RunSummary,
TestCaseSummary,
TestResult,
TestSessionState,
)

# Test data for simulated test results.
TEST_CASE_SUMMARIES = [
TestCaseSummary(
backend="backend1",
base_name="test1",
flow="flow1",
name="test1_backend1_flow1",
params=None,
result=TestResult.SUCCESS,
error=None,
),
TestCaseSummary(
backend="backend2",
base_name="test1",
flow="flow1",
name="test1_backend2_flow1",
params=None,
result=TestResult.LOWER_FAIL,
error=None,
),
TestCaseSummary(
backend="backend1",
base_name="test2",
flow="flow1",
name="test2_backend1_flow1",
params={"dtype": torch.float32},
result=TestResult.SUCCESS_UNDELEGATED,
error=None,
),
TestCaseSummary(
backend="backend2",
base_name="test2",
flow="flow1",
name="test2_backend2_flow1",
params={"use_dynamic_shapes": True},
result=TestResult.EXPORT_FAIL,
error=None,
),
]


class Reporting(unittest.TestCase):
def test_csv_report_simple(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: tsv works better usually

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong opinion on this, personally. Let's chat tomorrow. I'm good to switch it to TSV if you think it would be better.

# Verify the format of a simple CSV run report.
session_state = TestSessionState()
session_state.test_case_summaries.extend(TEST_CASE_SUMMARIES)
run_summary = RunSummary.from_session(session_state)

strio = StringIO()
generate_csv_report(run_summary, strio)

# Attempt to deserialize and validate the CSV report.
report = DictReader(StringIO(strio.getvalue()))
records = list(report)
self.assertEqual(len(records), 4)

# Validate first record: test1, backend1, SUCCESS
self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1")
self.assertEqual(records[0]["Test Case"], "test1")
self.assertEqual(records[0]["Backend"], "backend1")
self.assertEqual(records[0]["Flow"], "flow1")
self.assertEqual(records[0]["Result"], "Success (Delegated)")
self.assertEqual(records[0]["Dtype"], "")
self.assertEqual(records[0]["Use_dynamic_shapes"], "")

# Validate second record: test1, backend2, LOWER_FAIL
self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1")
self.assertEqual(records[1]["Test Case"], "test1")
self.assertEqual(records[1]["Backend"], "backend2")
self.assertEqual(records[1]["Flow"], "flow1")
self.assertEqual(records[1]["Result"], "Fail (Lowering)")
self.assertEqual(records[1]["Dtype"], "")
self.assertEqual(records[1]["Use_dynamic_shapes"], "")

# Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param
self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1")
self.assertEqual(records[2]["Test Case"], "test2")
self.assertEqual(records[2]["Backend"], "backend1")
self.assertEqual(records[2]["Flow"], "flow1")
self.assertEqual(records[2]["Result"], "Success (Undelegated)")
self.assertEqual(records[2]["Dtype"], str(torch.float32))
self.assertEqual(records[2]["Use_dynamic_shapes"], "")

# Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
self.assertEqual(records[3]["Test Case"], "test2")
self.assertEqual(records[3]["Backend"], "backend2")
self.assertEqual(records[3]["Flow"], "flow1")
self.assertEqual(records[3]["Result"], "Fail (Export)")
self.assertEqual(records[3]["Dtype"], "")
self.assertEqual(records[3]["Use_dynamic_shapes"], "True")
Loading