diff --git a/.github/workflows/_test_backend.yml b/.github/workflows/_test_backend.yml new file mode 100644 index 00000000000..8b572825bdf --- /dev/null +++ b/.github/workflows/_test_backend.yml @@ -0,0 +1,79 @@ +name: Test Backend + +on: + workflow_call: + inputs: + backend: + description: 'Backend to test (xnnpack, coreml, vulkan, qnn)' + required: true + type: string + flows: + description: 'JSON array of flows to test' + required: true + type: string + ref: + description: 'Git ref to checkout' + required: false + type: string + default: ${{ github.sha }} + timeout: + description: 'Job timeout in minutes' + required: false + type: number + default: 120 + run-linux: + description: 'Whether to run Linux tests' + required: false + type: boolean + default: false + run-macos: + description: 'Whether to run macOS tests' + required: false + type: boolean + default: false + +jobs: + test-backend-linux: + if: ${{ inputs.run-linux }} + strategy: + fail-fast: false + matrix: + flow: ${{ fromJSON(inputs.flows) }} + suite: [models, operators] + + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + with: + ref: ${{ inputs.ref }} + runner: linux.4xlarge.memory + docker-image: ci-image:executorch-ubuntu-22.04-clang12 + submodules: recursive + timeout: ${{ inputs.timeout }} + upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} + script: | + set -eux + + source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" + + test-backend-macos: + if: ${{ inputs.run-macos }} + strategy: + fail-fast: false + matrix: + flow: ${{ fromJSON(inputs.flows) }} + suite: [models, operators] + + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + with: + ref: ${{ inputs.ref }} + runner: macos-m1-stable + python-version: "3.12" + submodules: recursive + timeout: ${{ inputs.timeout }} + upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} + script: | + set -eux + + # This is needed to get the prebuilt PyTorch wheel from S3 + ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21 + + source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index c220b371c0a..4658fdc0d26 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -36,51 +36,3 @@ jobs: uses: ./.github/workflows/_link_check.yml with: ref: ${{ github.sha }} - - backend-test-linux: - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - strategy: - fail-fast: false - matrix: - flow: [ - qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w, - vulkan, vulkan_static_int8_per_channel, - xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor - ] - suite: [models, operators] - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - runner: linux.4xlarge.memory - docker-image: ci-image:executorch-ubuntu-22.04-clang12 - submodules: recursive - timeout: 120 - upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} - script: | - set -eux - - source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" - - backend-test-macos: - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - flow: [coreml, coreml_static_int8] - suite: [models, operators] - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - runner: macos-m1-stable - python-version: 3.12 - submodules: recursive - timeout: 120 - upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} - script: | - set -eux - - # This is needed to get the prebuilt PyTorch wheel from S3 - ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21 - - source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml new file mode 100644 index 00000000000..c6970ddff61 --- /dev/null +++ b/.github/workflows/test-backend-coreml.yml @@ -0,0 +1,27 @@ +name: Test CoreML Backend + +on: + schedule: + - cron: 0 2 * * * + push: + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-coreml.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-coreml: + uses: ./.github/workflows/_test_backend.yml + with: + backend: coreml + flows: '["coreml", "coreml_static_int8"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-macos: true diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml new file mode 100644 index 00000000000..64dc7cdce36 --- /dev/null +++ b/.github/workflows/test-backend-qnn.yml @@ -0,0 +1,27 @@ +name: Test QNN Backend + +on: + schedule: + - cron: 0 2 * * * + push: + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-qnn.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-qnn: + uses: ./.github/workflows/_test_backend.yml + with: + backend: qnn + flows: '["qnn", "qnn_16a16w", "qnn_16a8w", "qnn_16a4w", "qnn_16a4w_block", "qnn_8a8w"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml new file mode 100644 index 00000000000..f04fdcdd1f1 --- /dev/null +++ b/.github/workflows/test-backend-vulkan.yml @@ -0,0 +1,27 @@ +name: Test Vulkan Backend + +on: + schedule: + - cron: 0 2 * * * + push: + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-vulkan.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-vulkan: + uses: ./.github/workflows/_test_backend.yml + with: + backend: vulkan + flows: '["vulkan", "vulkan_static_int8_per_channel"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml new file mode 100644 index 00000000000..2ae423dd99b --- /dev/null +++ b/.github/workflows/test-backend-xnnpack.yml @@ -0,0 +1,27 @@ +name: Test XNNPACK Backend + +on: + schedule: + - cron: 0 2 * * * + push: + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-xnnpack.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-xnnpack: + uses: ./.github/workflows/_test_backend.yml + with: + backend: xnnpack + flows: '["xnnpack", "xnnpack_dynamic_int8_per_channel", "xnnpack_static_int8_per_channel", "xnnpack_static_int8_per_tensor"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index b7a126eaf35..9df3805444a 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Callable from executorch.backends.test.harness import Tester @@ -35,6 +35,12 @@ class TestFlow: is_delegated: bool = True """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """ + skip_patterns: list[str] = field(default_factory=lambda: []) + """ Tests with names containing any substrings in this list are skipped. """ + + def should_skip_test(self, test_name: str) -> bool: + return any(pattern in test_name for pattern in self.skip_patterns) + def all_flows() -> dict[str, TestFlow]: flows = [] diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py index fd956b64f05..8a532ff0003 100644 --- a/backends/test/suite/flows/coreml.py +++ b/backends/test/suite/flows/coreml.py @@ -19,6 +19,7 @@ def _create_coreml_flow( CoreMLTester, minimum_deployment_target=minimum_deployment_target ), quantize=quantize, + skip_patterns=["test_argmin", "test_argmax"], ) diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py index 2a8c4e506fa..a3a4fb55aba 100644 --- a/backends/test/suite/flows/vulkan.py +++ b/backends/test/suite/flows/vulkan.py @@ -20,6 +20,7 @@ def _create_vulkan_flow_base( tester_factory=VulkanTester, quantize=quantize_stage_factory is not None, quantize_stage_factory=quantize_stage_factory, + skip_patterns=["float16", "float64"], # Not supported in swiftshader ) diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py index 37bf758fed0..73da8fba678 100644 --- a/backends/test/suite/generate_markdown_summary.py +++ b/backends/test/suite/generate_markdown_summary.py @@ -12,6 +12,25 @@ # +def escape_for_markdown(text: str) -> str: + """ + Modify a string to properly display in a markdown table cell. + """ + if not text: + return text + + # Replace newlines with
tags + escaped = text.replace("\n", "
") + + # Escape backslashes. + escaped = escaped.replace("\\", "\\\\") + + # Escape pipe characters that would break table structure + escaped = escaped.replace("|", "\\|") + + return escaped + + def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) # Print warning if exit code is non-zero if exit_code != 0: @@ -46,7 +65,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) for row in data_rows: # Make a copy of the row to avoid modifying the original - processed_row = row.copy() + processed_row = [escape_for_markdown(cell) for cell in row] # Count results and collect failed tests if result_column_index is not None and result_column_index < len(row): @@ -96,7 +115,8 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) # Generate Failed Tests section print("# Failed Tests\n") if failed_tests: - print("| " + " | ".join(header) + " |") + escaped_header = [escape_for_markdown(col) for col in header] + print("| " + " | ".join(escaped_header) + " |") print("|" + "|".join(["---"] * len(header)) + "|") for row in failed_tests: print("| " + " | ".join(row) + " |") diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 65b546b0eb5..ea44275a463 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -52,6 +52,11 @@ def wrapped_test(self): "use_dynamic_shapes": use_dynamic_shapes, } with TestContext(test_name, test_func.__name__, flow.name, params): + if flow.should_skip_test(test_name): + raise unittest.SkipTest( + f"Skipping test due to matching flow {flow.name} skip patterns" + ) + test_func(self, flow, dtype, use_dynamic_shapes) wrapped_test._name = test_func.__name__ # type: ignore diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 6ceb9086f71..9c550b3a49c 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -97,6 +97,11 @@ def _make_wrapped_test( ): def wrapped_test(self): with TestContext(test_name, test_base_name, flow.name, params): + if flow.should_skip_test(test_name): + raise unittest.SkipTest( + f"Skipping test due to matching flow {flow.name} skip patterns" + ) + test_kwargs = copy.copy(params) or {} test_kwargs["flow"] = flow diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index ce8a48dcc12..cdf2ce870e1 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -45,6 +45,8 @@ ] ) +CSV_FIELD_NAMES.append("Error") + # Operators that are excluded from the counts returned by count_ops. These are used to # exclude operatations that are not logically relevant or delegatable to backends. @@ -365,6 +367,15 @@ def write_csv_header(output: TextIO): def write_csv_row(record: TestCaseSummary, output: TextIO): writer = csv.DictWriter(output, CSV_FIELD_NAMES) + # Truncate error message if it's too long, keeping first and last 200 characters + error_message = "" + if record.error is not None: + error_str = str(record.error) + if len(error_str) > 400: + error_message = error_str[:200] + "..." + error_str[-200:] + else: + error_message = error_str + row = { "Test ID": record.name, "Test Case": record.base_name, @@ -373,6 +384,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO): "Params": _serialize_params(record.params), "Result": record.result.to_short_str(), "Result Detail": record.result.to_detail_str(), + "Error": error_message, "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( f"{record.quantize_time.total_seconds():.3f}"