-
Notifications
You must be signed in to change notification settings - Fork 372
Add tensorrt test workflow #3266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
a0b1989
add test tensorrt workflow
lanluo-nvidia 09689ee
test
lanluo-nvidia 3206da7
test
lanluo-nvidia 0a64986
test
lanluo-nvidia a02d944
test
lanluo-nvidia 9c0ca36
test
lanluo-nvidia 67cbaf3
test
lanluo-nvidia 7765146
add some test
lanluo-nvidia 8a199aa
test
lanluo-nvidia 546a574
test
lanluo-nvidia 41aec8b
test
lanluo-nvidia a554285
test
lanluo-nvidia 7102fa5
test
lanluo-nvidia 0fd94e6
test
lanluo-nvidia 6cc2faa
resolve comments
lanluo-nvidia e9af038
add more tests
lanluo-nvidia 646a515
Merge branch 'main' into lluo/tensorrt_test_workflow
lanluo-nvidia 1bf5673
merge main into the branch
lanluo-nvidia 1f92a78
add comments
lanluo-nvidia 8fc3482
change decomposition default table due to upstream torch change
lanluo-nvidia 229bdf1
Merge branch 'lluo/decomposition_upstream_change' into lluo/tensorrt_…
lanluo-nvidia 27d068e
Merge branch 'main' into lluo/tensorrt_test_workflow
lanluo-nvidia ef87ca2
test
lanluo-nvidia e68ef16
test
lanluo-nvidia File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import copy | ||
import json | ||
import sys | ||
|
||
CUDA_VERSIONS_DICT = { | ||
"nightly": ["cu124"], | ||
"test": ["cu121", "cu124"], | ||
"release": ["cu121", "cu124"], | ||
} | ||
|
||
PYTHON_VERSIONS_DICT = { | ||
"nightly": ["3.9"], | ||
"test": ["3.9", "3.10", "3.11", "3.12"], | ||
"release": ["3.9", "3.10", "3.11", "3.12"], | ||
} | ||
|
||
TENSORRT_VERSIONS_DICT = { | ||
"windows": { | ||
"10.4.0": { | ||
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip", | ||
"strip_prefix": "TensorRT-10.4.0.26", | ||
"sha256": "3a7de83778b9e9f812fd8901e07e0d7d6fc54ce633fcff2e340f994df2c6356c", | ||
}, | ||
"10.5.0": { | ||
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip", | ||
"strip_prefix": "TensorRT-10.5.0.18", | ||
"sha256": "e6436f4164db4e44d727354dccf7d93755efb70d6fbfd6fa95bdfeb2e7331b24", | ||
}, | ||
"10.6.0": { | ||
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip", | ||
"strip_prefix": "TensorRT-10.6.0.26", | ||
"sha256": "6c6d92c108a1b3368423e8f69f08d31269830f1e4c9da43b37ba34a176797254", | ||
}, | ||
}, | ||
"linux": { | ||
"10.4.0": { | ||
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", | ||
"strip_prefix": "TensorRT-10.4.0.26", | ||
"sha256": "cb0273ecb3ba4db8993a408eedd354712301a6c7f20704c52cdf9f78aa97bbdb", | ||
}, | ||
"10.5.0": { | ||
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz", | ||
"strip_prefix": "TensorRT-10.5.0.18", | ||
"sha256": "f404d379d639552a3e026cd5267213bd6df18a4eb899d6e47815bbdb34854958", | ||
}, | ||
"10.6.0": { | ||
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", | ||
"strip_prefix": "TensorRT-10.6.0.26", | ||
"sha256": "33d3c2f3f4c84dc7991a4337a6fde9ed33f5c8e5c4f03ac2eb6b994a382b03a0", | ||
}, | ||
}, | ||
} | ||
|
||
|
||
def main(args: list[str]) -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--matrix", | ||
help="matrix", | ||
type=str, | ||
default="", | ||
) | ||
|
||
options = parser.parse_args(args) | ||
if options.matrix == "": | ||
raise Exception("--matrix is empty, please provide the matrix json str") | ||
|
||
matrix_dict = json.loads(options.matrix) | ||
includes = matrix_dict["include"] | ||
assert len(includes) > 0 | ||
if "channel" not in includes[0]: | ||
raise Exception(f"channel field is missing from the matrix: {options.matrix}") | ||
channel = includes[0]["channel"] | ||
if channel not in ("nightly", "test", "release"): | ||
raise Exception( | ||
f"channel field: {channel} is not supported, currently supported value: nightly, test, release" | ||
) | ||
|
||
if "validation_runner" not in includes[0]: | ||
raise Exception( | ||
f"validation_runner field is missing from the matrix: {options.matrix}" | ||
) | ||
if "windows" in includes[0]["validation_runner"]: | ||
arch = "windows" | ||
elif "linux" in includes[0]["validation_runner"]: | ||
arch = "linux" | ||
else: | ||
raise Exception( | ||
f"{includes[0].validation_runner} is not the supported arch, currently only support windows and linux" | ||
) | ||
|
||
cuda_versions = CUDA_VERSIONS_DICT[channel] | ||
python_versions = PYTHON_VERSIONS_DICT[channel] | ||
tensorrt_versions = TENSORRT_VERSIONS_DICT[arch] | ||
|
||
filtered_includes = [] | ||
for item in includes: | ||
if ( | ||
item["desired_cuda"] in cuda_versions | ||
and item["python_version"] in python_versions | ||
): | ||
for tensorrt_version, tensorrt_json in tensorrt_versions.items(): | ||
new_item = copy.deepcopy(item) | ||
tensorrt_json["version"] = tensorrt_version | ||
new_item["tensorrt"] = tensorrt_json | ||
filtered_includes.append(new_item) | ||
filtered_matrix_dict = {} | ||
filtered_matrix_dict["include"] = filtered_includes | ||
print(json.dumps(filtered_matrix_dict)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main(sys.argv[1:]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
name: Build Torch-TensorRT wheel on Linux with specified tensorRT version | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
repository: | ||
description: 'Repository to checkout, defaults to ""' | ||
default: "" | ||
type: string | ||
ref: | ||
description: 'Reference to checkout, defaults to "nightly"' | ||
default: "nightly" | ||
type: string | ||
test-infra-repository: | ||
description: "Test infra repository to use" | ||
default: "pytorch/test-infra" | ||
type: string | ||
test-infra-ref: | ||
description: "Test infra reference to use" | ||
default: "" | ||
type: string | ||
build-matrix: | ||
description: "Build matrix to utilize" | ||
default: "" | ||
type: string | ||
pre-script: | ||
description: "Pre script to run prior to build" | ||
default: "" | ||
type: string | ||
post-script: | ||
description: "Post script to run prior to build" | ||
default: "" | ||
type: string | ||
smoke-test-script: | ||
description: "Script for Smoke Test for a specific domain" | ||
default: "" | ||
type: string | ||
env-var-script: | ||
description: "Script that sets Domain-Specific Environment Variables" | ||
default: "" | ||
type: string | ||
package-name: | ||
description: "Name of the actual python package that is imported" | ||
default: "" | ||
type: string | ||
trigger-event: | ||
description: "Trigger Event in caller that determines whether or not to upload" | ||
default: "" | ||
type: string | ||
cache-path: | ||
description: "The path(s) on the runner to cache or restore. The path is relative to repository." | ||
default: "" | ||
type: string | ||
cache-key: | ||
description: "The key created when saving a cache and the key used to search for a cache." | ||
default: "" | ||
type: string | ||
architecture: | ||
description: Architecture to build for x86_64 for default Linux, or aarch64 for Linux aarch64 builds | ||
required: false | ||
type: string | ||
default: x86_64 | ||
submodules: | ||
description: Works as stated in actions/checkout, but the default value is recursive | ||
required: false | ||
type: string | ||
default: recursive | ||
setup-miniconda: | ||
description: Set to true if setup-miniconda is needed | ||
required: false | ||
type: boolean | ||
default: true | ||
|
||
permissions: | ||
id-token: write | ||
contents: read | ||
|
||
jobs: | ||
build: | ||
strategy: | ||
fail-fast: false | ||
matrix: ${{ fromJSON(inputs.build-matrix) }} | ||
env: | ||
PYTHON_VERSION: ${{ matrix.python_version }} | ||
PACKAGE_TYPE: wheel | ||
REPOSITORY: ${{ inputs.repository }} | ||
REF: ${{ inputs.ref }} | ||
CU_VERSION: ${{ matrix.desired_cuda }} | ||
UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} | ||
ARCH: ${{ inputs.architecture }} | ||
TENSORRT_STRIP_PREFIX: ${{ matrix.tensorrt.strip_prefix }} | ||
TENSORRT_VERSION: ${{ matrix.tensorrt.version }} | ||
TENSORRT_URLS: ${{ matrix.tensorrt.urls }} | ||
TENSORRT_SHA256: ${{ matrix.tensorrt.sha256 }} | ||
UPLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} | ||
name: build_tensorrt${{ matrix.tensorrt.version }}_py${{matrix.python_version}}_${{matrix.desired_cuda}} | ||
runs-on: ${{ matrix.validation_runner }} | ||
container: | ||
image: ${{ matrix.container_image }} | ||
options: ${{ matrix.gpu_arch_type == 'cuda' && '--gpus all' || ' ' }} | ||
# If a build is taking longer than 120 minutes on these runners we need | ||
# to have a conversation | ||
timeout-minutes: 120 | ||
|
||
steps: | ||
- name: Clean workspace | ||
shell: bash -l {0} | ||
run: | | ||
set -x | ||
echo "::group::Cleanup debug output" | ||
rm -rf "${GITHUB_WORKSPACE}" | ||
mkdir -p "${GITHUB_WORKSPACE}" | ||
if [[ "${{ inputs.architecture }}" = "aarch64" ]]; then | ||
rm -rf "${RUNNER_TEMP}/*" | ||
fi | ||
echo "::endgroup::" | ||
- uses: actions/checkout@v3 | ||
with: | ||
# Support the use case where we need to checkout someone's fork | ||
repository: ${{ inputs.test-infra-repository }} | ||
ref: ${{ inputs.test-infra-ref }} | ||
path: test-infra | ||
- uses: actions/checkout@v3 | ||
if: ${{ env.ARCH == 'aarch64' }} | ||
with: | ||
# Support the use case where we need to checkout someone's fork | ||
repository: "pytorch/builder" | ||
ref: "main" | ||
path: builder | ||
- name: Set linux aarch64 CI | ||
if: ${{ inputs.architecture == 'aarch64' }} | ||
shell: bash -l {0} | ||
env: | ||
DESIRED_PYTHON: ${{ matrix.python_version }} | ||
run: | | ||
set +e | ||
# TODO: This is temporary aarch64 setup script, this should be integrated into aarch64 docker. | ||
${GITHUB_WORKSPACE}/builder/aarch64_linux/aarch64_ci_setup.sh | ||
echo "/opt/conda/bin" >> $GITHUB_PATH | ||
set -e | ||
- uses: ./test-infra/.github/actions/set-channel | ||
- name: Set PYTORCH_VERSION | ||
if: ${{ env.CHANNEL == 'test' }} | ||
run: | | ||
# When building RC, set the version to be the current candidate version, | ||
# otherwise, leave it alone so nightly will pick up the latest | ||
echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" | ||
- uses: ./test-infra/.github/actions/setup-binary-builds | ||
env: | ||
PLATFORM: ${{ inputs.architecture == 'aarch64' && 'linux-aarch64' || ''}} | ||
with: | ||
repository: ${{ inputs.repository }} | ||
ref: ${{ inputs.ref }} | ||
submodules: ${{ inputs.submodules }} | ||
setup-miniconda: ${{ inputs.setup-miniconda }} | ||
python-version: ${{ env.PYTHON_VERSION }} | ||
cuda-version: ${{ env.CU_VERSION }} | ||
arch: ${{ env.ARCH }} | ||
- name: Combine Env Var and Build Env Files | ||
if: ${{ inputs.env-var-script != '' }} | ||
working-directory: ${{ inputs.repository }} | ||
shell: bash -l {0} | ||
run: | | ||
cat "${{ inputs.env-var-script }}" >> "${BUILD_ENV_FILE}" | ||
- name: Install torch dependency | ||
shell: bash -l {0} | ||
run: | | ||
set -x | ||
# shellcheck disable=SC1090 | ||
source "${BUILD_ENV_FILE}" | ||
# shellcheck disable=SC2086 | ||
${CONDA_RUN} ${PIP_INSTALL_TORCH} | ||
- name: Run Pre-Script with Caching | ||
if: ${{ inputs.pre-script != '' }} | ||
uses: ./test-infra/.github/actions/run-script-with-cache | ||
with: | ||
cache-path: ${{ inputs.cache-path }} | ||
cache-key: ${{ inputs.cache-key }} | ||
repository: ${{ inputs.repository }} | ||
script: ${{ inputs.pre-script }} | ||
- name: Build clean | ||
working-directory: ${{ inputs.repository }} | ||
shell: bash -l {0} | ||
run: | | ||
set -x | ||
source "${BUILD_ENV_FILE}" | ||
${CONDA_RUN} python setup.py clean | ||
- name: Build the wheel (bdist_wheel) | ||
working-directory: ${{ inputs.repository }} | ||
shell: bash -l {0} | ||
run: | | ||
set -x | ||
source "${BUILD_ENV_FILE}" | ||
${CONDA_RUN} python setup.py bdist_wheel | ||
|
||
- name: Run Post-Script | ||
if: ${{ inputs.post-script != '' }} | ||
uses: ./test-infra/.github/actions/run-script-with-cache | ||
with: | ||
repository: ${{ inputs.repository }} | ||
script: ${{ inputs.post-script }} | ||
- name: Smoke Test | ||
shell: bash -l {0} | ||
env: | ||
PACKAGE_NAME: ${{ inputs.package-name }} | ||
SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} | ||
run: | | ||
set -x | ||
source "${BUILD_ENV_FILE}" | ||
# TODO: add smoke test for the auditwheel tarball built | ||
|
||
# NB: Only upload to GitHub after passing smoke tests | ||
- name: Upload wheel to GitHub | ||
continue-on-error: true | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: ${{ env.UPLOAD_ARTIFACT_NAME }} | ||
path: ${{ inputs.repository }}/dist | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} | ||
cancel-in-progress: true |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.