diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index ff827ad3f6..0c92e207bb 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -1,7 +1,7 @@ name: Build and test Linux aarch64 wheels for Jetpack on: - pull_request: + #pull_request: push: branches: - main diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index 2604d18f92..fc3441ffd2 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -1,7 +1,7 @@ name: Build and test Linux aarch64 wheels on: - pull_request: + #pull_request: push: branches: - main diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 6d94546177..5f95f7d209 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -1,7 +1,7 @@ name: Build and test Linux x86_64 wheels on: - pull_request: + #pull_request: push: branches: - main diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index 34f9d00568..469b8278aa 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -1,7 +1,7 @@ name: RTX - Build and test Linux x86_64 wheels on: - pull_request: + #pull_request: push: branches: - main diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index c62515cec4..bed72e7274 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -85,69 +85,6 @@ jobs: trigger-event: ${{ github.event_name }} timeout: 120 - tests-py-torchscript-fe: - name: Test torchscript frontend [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-torchscript-fe - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/modules - python hub.py - popd - pushd . - cd tests/py/ts - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ - popd - - tests-py-dynamo-converters: - name: Test dynamo converters [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-converters - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - nvidia-smi - nvcc --version - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ - popd - tests-py-dynamo-fe: name: Test dynamo frontend [Python] needs: [substitute-runner, build] @@ -167,160 +104,264 @@ jobs: build-matrix: ${{ needs.substitute-runner.outputs.matrix }} pre-script: packaging/driver_upgrade.bat script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/ - popd - tests-py-dynamo-serde: - name: Test dynamo export serde [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-serde - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail + #set -euo pipefail + set -x export USE_HOST_DEPS=1 export CI_BUILD=1 pushd . cd tests/py cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py - popd - tests-py-torch-compile-be: - name: Test torch compile backend [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-torch-compile-be - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + gdb --version + nvidia-smi + nvcc --version + python -m pytest -k "lan_1" -rP models/test_models.py + echo "lan added finished lan_1" + python -m pytest -k "lan_2" -rP models/test_models.py + echo "lan added finished lan_2" + python -m pytest -k "lan_3" -rP models/test_models.py + echo "lan added finished lan_3" popd - tests-py-dynamo-core: - name: Test dynamo core [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-core - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ - popd - tests-py-dynamo-cudagraphs: - name: Test dynamo cudagraphs [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-cudagraphs - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py - popd + # tests-py-torchscript-fe: + # name: Test torchscript frontend [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-torchscript-fe + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/modules + # python hub.py + # popd + # pushd . + # cd tests/py/ts + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + # popd - tests-py-core: - name: Test core [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-core - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py/core - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . - popd + # tests-py-dynamo-converters: + # name: Test dynamo converters [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-converters + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # nvidia-smi + # nvcc --version + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ + # popd + + # tests-py-dynamo-fe: + # name: Test dynamo frontend [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-fe + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/ + # popd + + # tests-py-dynamo-serde: + # name: Test dynamo export serde [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-serde + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py + # popd + + # tests-py-torch-compile-be: + # name: Test torch compile backend [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-torch-compile-be + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + # ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + # popd + + # tests-py-dynamo-core: + # name: Test dynamo core [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-core + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + # ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + # popd + + # tests-py-dynamo-cudagraphs: + # name: Test dynamo cudagraphs [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-cudagraphs + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py + # popd + + # tests-py-core: + # name: Test core [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-core + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/core + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + # popd concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index 9ee768b964..4bd08deafd 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -1,7 +1,7 @@ name: RTX - Build and test Windows wheels on: - pull_request: + #pull_request: push: branches: - main @@ -87,34 +87,34 @@ jobs: use-rtx: true timeout: 120 - tests-py-dynamo-converters: - name: Test dynamo converters [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-converters - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ - popd + # tests-py-dynamo-converters: + # name: Test dynamo converters [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-converters + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # use-rtx: true + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ + # popd tests-py-dynamo-fe: name: Test dynamo frontend [Python] @@ -146,155 +146,155 @@ jobs: python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_llm.xml llm/ popd - tests-py-dynamo-serde: - name: Test dynamo export serde [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-serde - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py - popd + # tests-py-dynamo-serde: + # name: Test dynamo export serde [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-serde + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # use-rtx: true + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py + # popd - tests-py-torch-compile-be: - name: Test torch compile backend [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-torch-compile-be - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py - ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py - popd + # tests-py-torch-compile-be: + # name: Test torch compile backend [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-torch-compile-be + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # use-rtx: true + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + # ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + # popd - tests-py-dynamo-core: - name: Test dynamo core [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-core - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ - ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ - popd + # tests-py-dynamo-core: + # name: Test dynamo core [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-core + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # use-rtx: true + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + # ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + # popd - tests-py-dynamo-cudagraphs: - name: Test dynamo cudagraphs [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-dynamo-cudagraphs - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py - cd dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py - popd + # tests-py-dynamo-cudagraphs: + # name: Test dynamo cudagraphs [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-dynamo-cudagraphs + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # use-rtx: true + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py + # cd dynamo + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py + # python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py + # popd - tests-py-core: - name: Test core [Python] - needs: [substitute-runner, build] - strategy: - fail-fast: false - matrix: - include: - - repository: pytorch/tensorrt - package-name: torch_tensorrt - uses: ./.github/workflows/windows-test.yml - with: - job-name: tests-py-core - repository: ${{ matrix.repository }} - ref: "" - test-infra-repository: pytorch/test-infra - test-infra-ref: main - build-matrix: ${{ needs.substitute-runner.outputs.matrix }} - pre-script: packaging/driver_upgrade.bat - use-rtx: true - script: | - set -euo pipefail - export USE_HOST_DEPS=1 - export CI_BUILD=1 - pushd . - cd tests/py/core - python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . - popd + # tests-py-core: + # name: Test core [Python] + # needs: [substitute-runner, build] + # strategy: + # fail-fast: false + # matrix: + # include: + # - repository: pytorch/tensorrt + # package-name: torch_tensorrt + # uses: ./.github/workflows/windows-test.yml + # with: + # job-name: tests-py-core + # repository: ${{ matrix.repository }} + # ref: "" + # test-infra-repository: pytorch/test-infra + # test-infra-ref: main + # build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + # pre-script: packaging/driver_upgrade.bat + # use-rtx: true + # script: | + # set -euo pipefail + # export USE_HOST_DEPS=1 + # export CI_BUILD=1 + # pushd . + # cd tests/py/core + # python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + # popd concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index 8d8c51cbfc..c26fff2cc0 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -227,6 +227,7 @@ def compile( module_type = _parse_module_type(module) target_ir = _get_target_fe(module_type, ir) + print(f"lan added {target_ir=}") if target_ir == _IRType.ts: ts_mod = module if module_type == _ModuleType.nn: @@ -307,6 +308,7 @@ def _fx_input_interface( exp_program = dynamo_trace( module, torchtrt_arg_inputs, kwarg_inputs=torchtrt_kwarg_inputs, **kwargs ) + print(f"lan added {str(exp_program.graph)=}") trt_graph_module = dynamo_compile( exp_program, arg_inputs=torchtrt_arg_inputs, diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py index 0dc4654db0..a24aeea75f 100644 --- a/py/torch_tensorrt/dynamo/_compiler.py +++ b/py/torch_tensorrt/dynamo/_compiler.py @@ -5,6 +5,7 @@ import os import platform import warnings +from functools import total_ordering from typing import Any, Collection, List, Optional, Sequence, Set, Tuple, Union import torch @@ -809,6 +810,7 @@ def preserve_module_specs( # Partition module into components that can be TRT-accelerated fast_partitioner_failed = False + print(f"lan added {str(gm.graph)=}") # If specified, try using the fast partitioner and fall back to the global one on failure if settings.use_fast_partitioner: try: @@ -847,10 +849,14 @@ def preserve_module_specs( dryrun_tracker.to_run_in_torch.extend(parse_non_trt_nodes(partitioned_module)) submodule_node_dict = {} + print(f"lan added {list(partitioned_module.graph.nodes)=}") + print(f"lan added {total_ops=} {num_supported_ops=}") for node in partitioned_module.graph.nodes: if "_run_on_acc" not in node.name: + print(f"lan added skipped node{node.name=}") continue submodule_node_dict[node.name] = node + print(f"lan added added submodule{node.name=}") preserve_module_specs(original_in_spec, original_out_spec, partitioned_module) # Store TRT replicas of Torch subgraphs diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index 3d5e2190d2..96b9ac9002 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -19,73 +19,32 @@ import timm -@pytest.mark.unit @unittest.skipIf( - not importlib.util.find_spec("torchvision"), - "torchvision is not installed", -) -def test_resnet18(ir): - model = models.resnet18(pretrained=True).eval().to("cuda") - input = torch.randn((1, 3, 224, 224)).to("cuda") - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format - ) - ], - "device": torchtrt.Device("cuda:0"), - "enabled_precisions": {torch.float}, - "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "cache_built_engines": False, - "reuse_cached_engines": False, - } - - trt_mod = torchtrt.compile(model, **compile_spec) - cos_sim = cosine_similarity(model(input), trt_mod(input)) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() - - -@pytest.mark.unit -@unittest.skipIf( - not importlib.util.find_spec("torchvision"), - "torchvision is not installed", + not importlib.util.find_spec("torchvision"), "torchvision not installed" ) -def test_resnet18_cpu_offload(ir): +def test_lan_1_resnet18_torch_exec_ops(ir): model = models.resnet18(pretrained=True).eval().to("cuda") input = torch.randn((1, 3, 224, 224)).to("cuda") compile_spec = { "inputs": [ torchtrt.Input( - input.shape, dtype=torch.float, format=torch.contiguous_format + min_shape=(1, 3, 224, 224), + opt_shape=(8, 3, 224, 224), + max_shape=(16, 3, 224, 224), + dtype=torch.float32, ) ], - "device": torchtrt.Device("cuda:0"), - "enabled_precisions": {torch.float}, "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "cache_built_engines": False, - "reuse_cached_engines": False, - "offload_module_to_cpu": True, + "enabled_precisions": {torch.float32}, + "min_block_size": 1, + "output_format": "exported_program", + "cache_built_engines": True, + "reuse_cached_engines": True, + "torch_executed_ops": {torch.ops.aten.matmul, "torch.ops.aten.add"}, } trt_mod = torchtrt.compile(model, **compile_spec) - if ir == "dynamo": - assertions.assertTrue( - get_model_device(model).type == "cpu", - msg="Model should be offloaded to CPU", - ) - model.cuda() cos_sim = cosine_similarity(model(input), trt_mod(input)) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, @@ -99,7 +58,7 @@ def test_resnet18_cpu_offload(ir): @unittest.skipIf( not importlib.util.find_spec("torchvision"), "torchvision not installed" ) -def test_resnet18_torch_exec_ops(ir): +def test_lan_2_resnet18_torch_exec_ops(ir): model = models.resnet18(pretrained=True).eval().to("cuda") input = torch.randn((1, 3, 224, 224)).to("cuda") @@ -132,335 +91,371 @@ def test_resnet18_torch_exec_ops(ir): torch._dynamo.reset() -@pytest.mark.unit -@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) @unittest.skipIf( - not importlib.util.find_spec("torchvision"), - "torchvision is not installed", -) -def test_mobilenet_v2(ir, dtype): - if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: - pytest.skip("TensorRT-RTX does not support bfloat16") - - model = models.mobilenet_v2(pretrained=True).eval().to("cuda").to(dtype) - input = torch.randn((1, 3, 224, 224)).to("cuda").to(dtype) - - compile_spec = { - "inputs": [ - torchtrt.Input(input.shape, dtype=dtype, format=torch.contiguous_format) - ], - "device": torchtrt.Device("cuda:0"), - "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "min_block_size": 10, - "cache_built_engines": False, - "reuse_cached_engines": False, - "use_explicit_typing": True, - } - - trt_mod = torchtrt.compile(model, **compile_spec) - pyt_output = model(input) - trt_output = trt_mod(input) - assert pyt_output.dtype == trt_output.dtype - assert pyt_output.dtype == dtype - cos_sim = cosine_similarity(pyt_output, trt_output) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"Mobilenet v2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() - - -@pytest.mark.unit -@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) -@unittest.skipIf( - not importlib.util.find_spec("timm") or not importlib.util.find_spec("torchvision"), - "timm or torchvision not installed", -) -def test_efficientnet_b0(ir, dtype): - if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: - pytest.skip("TensorRT-RTX does not support bfloat16, skipping test") - - model = ( - timm.create_model("efficientnet_b0", pretrained=True) - .eval() - .to("cuda") - .to(dtype) - ) - input = torch.randn((1, 3, 224, 224)).to("cuda").to(dtype) - - compile_spec = { - "inputs": [ - torchtrt.Input(input.shape, dtype=dtype, format=torch.contiguous_format) - ], - "device": torchtrt.Device("cuda:0"), - "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "min_block_size": 10, - "cache_built_engines": False, - "reuse_cached_engines": False, - "use_explicit_typing": True, - } - - trt_mod = torchtrt.compile(model, **compile_spec) - pyt_output = model(input) - trt_output = trt_mod(input) - assert pyt_output.dtype == trt_output.dtype - assert pyt_output.dtype == dtype - cos_sim = cosine_similarity(pyt_output, trt_output) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() - - -@pytest.mark.unit -@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) -@unittest.skipIf( - not importlib.util.find_spec("transformers"), - "transformers is required to run this test", -) -def test_bert_base_uncased(ir, dtype): - if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: - pytest.skip("TensorRT-RTX does not support bfloat16") - - from transformers import BertModel - - model = BertModel.from_pretrained("bert-base-uncased").cuda().eval().to(dtype) - input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") - input2 = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, - dtype=input.dtype, - format=torch.contiguous_format, - ), - torchtrt.Input( - input.shape, - dtype=input.dtype, - format=torch.contiguous_format, - ), - ], - "device": torchtrt.Device("cuda:0"), - "truncate_double": True, - "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "min_block_size": 15, - "cache_built_engines": False, - "reuse_cached_engines": False, - "use_explicit_typing": True, - } - trt_mod = torchtrt.compile(model, **compile_spec) - - model_outputs = model(input, input2) - trt_model_outputs = trt_mod(input, input2) - for key in model_outputs.keys(): - out, trt_out = model_outputs[key], trt_model_outputs[key] - assert out.dtype == trt_out.dtype - assert out.dtype == dtype - cos_sim = cosine_similarity(out, trt_out) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() - - -@pytest.mark.unit -def test_bert_base_uncased_cpu_offload(ir): - from transformers import BertModel - - model = BertModel.from_pretrained("bert-base-uncased").cuda().eval() - input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") - input2 = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, - dtype=input.dtype, - format=torch.contiguous_format, - ), - torchtrt.Input( - input.shape, - dtype=input.dtype, - format=torch.contiguous_format, - ), - ], - "device": torchtrt.Device("cuda:0"), - "enabled_precisions": {torch.float}, - "truncate_double": True, - "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "min_block_size": 15, - "cache_built_engines": False, - "reuse_cached_engines": False, - "offload_module_to_cpu": True, - } - trt_mod = torchtrt.compile(model, **compile_spec) - if ir == "dynamo": - assertions.assertTrue( - get_model_device(model).type == "cpu", - msg="Model should be offloaded to CPU", - ) - model.cuda() - - model_outputs = model(input, input2) - trt_model_outputs = trt_mod(input, input2) - for key in model_outputs.keys(): - out, trt_out = model_outputs[key], trt_model_outputs[key] - cos_sim = cosine_similarity(out, trt_out) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() - - -@pytest.mark.unit -@unittest.skipIf( - not importlib.util.find_spec("torchvision"), - "torchvision is not installed", -) -def test_resnet18_half(ir): - model = models.resnet18(pretrained=True).eval().to("cuda").half() - input = torch.randn((1, 3, 224, 224)).to("cuda").half() - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.half, format=torch.contiguous_format - ) - ], - "device": torchtrt.Device("cuda:0"), - "enabled_precisions": {torch.half}, - "ir": ir, - "pass_through_build_failures": True, - "optimization_level": 1, - "cache_built_engines": False, - "reuse_cached_engines": False, - } - - trt_mod = torchtrt.compile(model, **compile_spec) - cos_sim = cosine_similarity(model(input), trt_mod(input)) - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"Resnet18 Half TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() - - -@pytest.mark.unit -@unittest.skipIf( - torchtrt.ENABLED_FEATURES.tensorrt_rtx, - "bf16 is not supported for tensorrt_rtx", + not importlib.util.find_spec("torchvision"), "torchvision not installed" ) -def test_bf16_model(ir): - class MyModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) - self.relu = torch.nn.ReLU() - - def forward(self, x): - out = self.conv(x) - out = self.relu(out) - return out - - model = MyModule().eval().cuda().to(torch.bfloat16) - input = torch.randn((1, 3, 224, 224)).to("cuda").to(torch.bfloat16) +def test_lan_3_resnet18_torch_exec_ops(ir): + model = models.resnet18(pretrained=True).eval().to("cuda") + input = torch.randn((1, 3, 224, 224)).to("cuda") compile_spec = { "inputs": [ torchtrt.Input( - input.shape, dtype=torch.bfloat16, format=torch.contiguous_format + min_shape=(1, 3, 224, 224), + opt_shape=(8, 3, 224, 224), + max_shape=(16, 3, 224, 224), + dtype=torch.float32, ) ], - "device": torchtrt.Device("cuda:0"), - "enabled_precisions": {torch.float32}, "ir": ir, - "pass_through_build_failures": True, + "enabled_precisions": {torch.float32, torch.float16}, "min_block_size": 1, + "output_format": "exported_program", "cache_built_engines": False, "reuse_cached_engines": False, - "use_explicit_typing": True, + "torch_executed_ops": {torch.ops.aten.matmul, "torch.ops.aten.add"}, } trt_mod = torchtrt.compile(model, **compile_spec) cos_sim = cosine_similarity(model(input), trt_mod(input)) - assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"BF16 model TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"Resnet18 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) # Clean up model env torch._dynamo.reset() -@pytest.mark.unit -@unittest.skipIf( - torchtrt.ENABLED_FEATURES.tensorrt_rtx, - "bf16 is not supported for tensorrt_rtx", -) -def test_bf16_fallback_model(ir): - class MyModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.conv = torch.nn.Conv2d(3, 16, 3, padding=1, stride=1, bias=True) - self.relu = torch.nn.ReLU() - self.conv2 = torch.nn.Conv2d(16, 16, 3, padding=1, stride=1, bias=True) - - def forward(self, x): - out = self.conv(x) - out = self.relu(out) - out = self.conv2(out) - return out - - model = MyModule().eval().cuda().to(torch.bfloat16) - input = torch.randn((1, 3, 224, 224)).to("cuda").to(torch.bfloat16) - - compile_spec = { - "inputs": [ - torchtrt.Input( - input.shape, dtype=torch.bfloat16, format=torch.contiguous_format - ) - ], - "device": torchtrt.Device("cuda:0"), - "enabled_precisions": {torch.float32}, - "ir": ir, - "pass_through_build_failures": True, - "min_block_size": 1, - "cache_built_engines": False, - "reuse_cached_engines": False, - "use_explicit_typing": True, - "torch_executed_ops": {"torch.ops.aten.relu.default"}, - } - - trt_mod = torchtrt.compile(model, **compile_spec) - cos_sim = cosine_similarity(model(input), trt_mod(input)) - - assertions.assertTrue( - cos_sim > COSINE_THRESHOLD, - msg=f"BF16 fallback model TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", - ) - - # Clean up model env - torch._dynamo.reset() +# @pytest.mark.unit +# @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) +# @unittest.skipIf( +# not importlib.util.find_spec("torchvision"), +# "torchvision is not installed", +# ) +# def test_lan_3_mobilenet_v2(ir, dtype): +# if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: +# pytest.skip("TensorRT-RTX does not support bfloat16") + +# model = models.mobilenet_v2(pretrained=True).eval().to("cuda").to(dtype) +# input = torch.randn((1, 3, 224, 224)).to("cuda").to(dtype) + +# compile_spec = { +# "inputs": [ +# torchtrt.Input(input.shape, dtype=dtype, format=torch.contiguous_format) +# ], +# "device": torchtrt.Device("cuda:0"), +# "ir": ir, +# "pass_through_build_failures": True, +# "optimization_level": 1, +# "min_block_size": 10, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# "use_explicit_typing": True, +# } + +# trt_mod = torchtrt.compile(model, **compile_spec) +# pyt_output = model(input) +# trt_output = trt_mod(input) +# assert pyt_output.dtype == trt_output.dtype +# assert pyt_output.dtype == dtype +# cos_sim = cosine_similarity(pyt_output, trt_output) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"Mobilenet v2 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset() + + +# @pytest.mark.unit +# @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) +# @unittest.skipIf( +# not importlib.util.find_spec("timm") or not importlib.util.find_spec("torchvision"), +# "timm or torchvision not installed", +# ) +# def test_lan_4_efficientnet_b0(ir, dtype): +# if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: +# pytest.skip("TensorRT-RTX does not support bfloat16, skipping test") + +# model = ( +# timm.create_model("efficientnet_b0", pretrained=True) +# .eval() +# .to("cuda") +# .to(dtype) +# ) +# input = torch.randn((1, 3, 224, 224)).to("cuda").to(dtype) + +# compile_spec = { +# "inputs": [ +# torchtrt.Input(input.shape, dtype=dtype, format=torch.contiguous_format) +# ], +# "device": torchtrt.Device("cuda:0"), +# "ir": ir, +# "pass_through_build_failures": True, +# "optimization_level": 1, +# "min_block_size": 10, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# "use_explicit_typing": True, +# } + +# trt_mod = torchtrt.compile(model, **compile_spec) +# pyt_output = model(input) +# trt_output = trt_mod(input) +# assert pyt_output.dtype == trt_output.dtype +# assert pyt_output.dtype == dtype +# cos_sim = cosine_similarity(pyt_output, trt_output) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"EfficientNet-B0 TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset() + + +# @pytest.mark.unit +# @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) +# @unittest.skipIf( +# not importlib.util.find_spec("transformers"), +# "transformers is required to run this test", +# ) +# def test_lan_5_bert_base_uncased(ir, dtype): +# if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16: +# pytest.skip("TensorRT-RTX does not support bfloat16") + +# from transformers import BertModel + +# model = BertModel.from_pretrained("bert-base-uncased").cuda().eval().to(dtype) +# input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") +# input2 = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") + +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, +# dtype=input.dtype, +# format=torch.contiguous_format, +# ), +# torchtrt.Input( +# input.shape, +# dtype=input.dtype, +# format=torch.contiguous_format, +# ), +# ], +# "device": torchtrt.Device("cuda:0"), +# "truncate_double": True, +# "ir": ir, +# "pass_through_build_failures": True, +# "optimization_level": 1, +# "min_block_size": 15, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# "use_explicit_typing": True, +# } +# trt_mod = torchtrt.compile(model, **compile_spec) + +# model_outputs = model(input, input2) +# trt_model_outputs = trt_mod(input, input2) +# for key in model_outputs.keys(): +# out, trt_out = model_outputs[key], trt_model_outputs[key] +# assert out.dtype == trt_out.dtype +# assert out.dtype == dtype +# cos_sim = cosine_similarity(out, trt_out) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset() + + +# @pytest.mark.unit +# def test_lan_6_bert_base_uncased_cpu_offload(ir): +# from transformers import BertModel + +# model = BertModel.from_pretrained("bert-base-uncased").cuda().eval() +# input = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") +# input2 = torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda") + +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, +# dtype=input.dtype, +# format=torch.contiguous_format, +# ), +# torchtrt.Input( +# input.shape, +# dtype=input.dtype, +# format=torch.contiguous_format, +# ), +# ], +# "device": torchtrt.Device("cuda:0"), +# "enabled_precisions": {torch.float}, +# "truncate_double": True, +# "ir": ir, +# "pass_through_build_failures": True, +# "optimization_level": 1, +# "min_block_size": 15, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# "offload_module_to_cpu": True, +# } +# trt_mod = torchtrt.compile(model, **compile_spec) +# if ir == "dynamo": +# assertions.assertTrue( +# get_model_device(model).type == "cpu", +# msg="Model should be offloaded to CPU", +# ) +# model.cuda() + +# model_outputs = model(input, input2) +# trt_model_outputs = trt_mod(input, input2) +# for key in model_outputs.keys(): +# out, trt_out = model_outputs[key], trt_model_outputs[key] +# cos_sim = cosine_similarity(out, trt_out) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"HF BERT base-uncased TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset() + + +# @pytest.mark.unit +# @unittest.skipIf( +# not importlib.util.find_spec("torchvision"), +# "torchvision is not installed", +# ) +# def test_lan_7_resnet18_half(ir): +# model = models.resnet18(pretrained=True).eval().to("cuda").half() +# input = torch.randn((1, 3, 224, 224)).to("cuda").half() + +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, dtype=torch.half, format=torch.contiguous_format +# ) +# ], +# "device": torchtrt.Device("cuda:0"), +# "enabled_precisions": {torch.half}, +# "ir": ir, +# "pass_through_build_failures": True, +# "optimization_level": 1, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# } + +# trt_mod = torchtrt.compile(model, **compile_spec) +# cos_sim = cosine_similarity(model(input), trt_mod(input)) +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"Resnet18 Half TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset() + + +# @pytest.mark.unit +# @unittest.skipIf( +# torchtrt.ENABLED_FEATURES.tensorrt_rtx, +# "bf16 is not supported for tensorrt_rtx", +# ) +# def test_lan_8_bf16_model(ir): +# class MyModule(torch.nn.Module): +# def __init__(self): +# super().__init__() +# self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True) +# self.relu = torch.nn.ReLU() + +# def forward(self, x): +# out = self.conv(x) +# out = self.relu(out) +# return out + +# model = MyModule().eval().cuda().to(torch.bfloat16) +# input = torch.randn((1, 3, 224, 224)).to("cuda").to(torch.bfloat16) + +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, dtype=torch.bfloat16, format=torch.contiguous_format +# ) +# ], +# "device": torchtrt.Device("cuda:0"), +# "enabled_precisions": {torch.float32}, +# "ir": ir, +# "pass_through_build_failures": True, +# "min_block_size": 1, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# "use_explicit_typing": True, +# } + +# trt_mod = torchtrt.compile(model, **compile_spec) +# cos_sim = cosine_similarity(model(input), trt_mod(input)) + +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"BF16 model TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset() + + +# @pytest.mark.unit +# @unittest.skipIf( +# torchtrt.ENABLED_FEATURES.tensorrt_rtx, +# "bf16 is not supported for tensorrt_rtx", +# ) +# def test_lan_9_bf16_fallback_model(ir): +# class MyModule(torch.nn.Module): +# def __init__(self): +# super().__init__() +# self.conv = torch.nn.Conv2d(3, 16, 3, padding=1, stride=1, bias=True) +# self.relu = torch.nn.ReLU() +# self.conv2 = torch.nn.Conv2d(16, 16, 3, padding=1, stride=1, bias=True) + +# def forward(self, x): +# out = self.conv(x) +# out = self.relu(out) +# out = self.conv2(out) +# return out + +# model = MyModule().eval().cuda().to(torch.bfloat16) +# input = torch.randn((1, 3, 224, 224)).to("cuda").to(torch.bfloat16) + +# compile_spec = { +# "inputs": [ +# torchtrt.Input( +# input.shape, dtype=torch.bfloat16, format=torch.contiguous_format +# ) +# ], +# "device": torchtrt.Device("cuda:0"), +# "enabled_precisions": {torch.float32}, +# "ir": ir, +# "pass_through_build_failures": True, +# "min_block_size": 1, +# "cache_built_engines": False, +# "reuse_cached_engines": False, +# "use_explicit_typing": True, +# "torch_executed_ops": {"torch.ops.aten.relu.default"}, +# } + +# trt_mod = torchtrt.compile(model, **compile_spec) +# cos_sim = cosine_similarity(model(input), trt_mod(input)) + +# assertions.assertTrue( +# cos_sim > COSINE_THRESHOLD, +# msg=f"BF16 fallback model TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", +# ) + +# # Clean up model env +# torch._dynamo.reset()