Skip to content

Commit 7d01bf2

Browse files
Merge branch 'pytorch:main' into temp-gha-runner-v3
2 parents 0ca5ca7 + 33a1996 commit 7d01bf2

File tree

268 files changed

+8279
-7668
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+8279
-7668
lines changed

.ci/docker/common/install_triton.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,5 +103,5 @@ fi
103103
# It depends on torch and triton. We don't want to install
104104
# triton and torch from production on Docker CI images
105105
if [[ "$ANACONDA_PYTHON_VERSION" != 3.9* ]]; then
106-
pip_install helion==0.0.10 --no-deps
106+
pip_install helion --no-deps
107107
fi

.ci/docker/requirements-docs.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
sphinx==5.3.0
22
#Description: This is used to generate PyTorch docs
33
#Pinned versions: 5.3.0
4-
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
4+
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2
55

66
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
77
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
@@ -50,7 +50,7 @@ IPython==8.12.0
5050
#Pinned versions: 8.12.0
5151

5252
myst-nb==0.17.2
53-
#Description: This is used to generate PyTorch functorch and torch.compile docs
53+
#Description: This is used to generate PyTorch functorch and torch.compile docs.
5454
#Pinned versions: 0.17.2
5555

5656
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs

.ci/manywheel/build_rocm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
194194
ROCBLAS_LIB_DST=lib/rocblas/library
195195
ROCBLAS_ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
196196
ROCBLAS_OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
197-
ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $OTHER_FILES)
197+
ROCBLAS_LIB_FILES=($ROCBLAS_ARCH_SPECIFIC_FILES $ROCBLAS_OTHER_FILES)
198198

199199
# hipblaslt library files
200200
HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library

.ci/pytorch/test.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,8 @@ test_perf_for_dashboard() {
627627
device=cuda_a10g
628628
elif [[ "${TEST_CONFIG}" == *h100* ]]; then
629629
device=cuda_h100
630+
elif [[ "${TEST_CONFIG}" == *b200* ]]; then
631+
device=cuda_b200
630632
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
631633
device=rocm
632634
fi
@@ -801,6 +803,16 @@ test_dynamo_benchmark() {
801803
if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
802804
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
803805
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
806+
# TODO (huydhn): Just smoke test some sample models
807+
if [[ "${TEST_CONFIG}" == *b200* ]]; then
808+
if [[ "${suite}" == "huggingface" ]]; then
809+
export TORCHBENCH_ONLY_MODELS="DistillGPT2"
810+
elif [[ "${suite}" == "timm_models" ]]; then
811+
export TORCHBENCH_ONLY_MODELS="inception_v3"
812+
elif [[ "${suite}" == "torchbench" ]]; then
813+
export TORCHBENCH_ONLY_MODELS="hf_Bert"
814+
fi
815+
fi
804816
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
805817
else
806818
if [[ "${TEST_CONFIG}" == *cpu* ]]; then

.github/ci_commit_pins/audio.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
bf305f538005f2e900f8850ed57146024a8bc559
1+
9b57c7bd5ad4db093c5bb31c802df9f04d933ac9

.github/ci_commit_pins/vllm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ca9e2be3ed6320b51f52f536595cd24e254f8bb2
1+
6a39ba85fe0f2fff9494b5eccea717c93510c230

.github/merge_rules.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,10 @@
488488
- torch/_dynamo/**
489489
- torch/csrc/dynamo/**
490490
- test/dynamo/**
491+
- test/dynamo_expected_failures/**
492+
- test/dynamo_skips/**
493+
- test/inductor_expected_failures/**
494+
- test/inductor_skips/**
491495
approved_by:
492496
- guilhermeleobas
493497
mandatory_checks_name:

.github/workflows/_linux-test.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ jobs:
9696
steps:
9797
- name: Setup SSH (Click me for login details)
9898
uses: pytorch/test-infra/.github/actions/setup-ssh@main
99-
if: ${{ matrix.runner != 'B200' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
99+
if: ${{ !contains(matrix.runner, 'b200') && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
100100
with:
101101
github-secret: ${{ secrets.GITHUB_TOKEN }}
102102
instructions: |
@@ -109,15 +109,15 @@ jobs:
109109
no-sudo: true
110110

111111
- name: Setup Python
112-
if: matrix.runner == 'B200'
112+
if: contains(matrix.runner, 'b200')
113113
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
114114
with:
115115
python-version: '3.12'
116116
cache: pip
117117

118118
- name: Setup Linux
119119
uses: ./.github/actions/setup-linux
120-
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && matrix.runner != 'B200'
120+
if: inputs.build-environment != 'linux-s390x-binary-manywheel' && !contains(matrix.runner, 'b200')
121121

122122
- name: configure aws credentials
123123
if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
@@ -128,7 +128,7 @@ jobs:
128128
aws-region: us-east-1
129129

130130
- name: Login to Amazon ECR
131-
if: ${{ inputs.aws-role-to-assume != '' && matrix.runner == 'B200' }}
131+
if: ${{ inputs.aws-role-to-assume != '' && contains(matrix.runner, 'b200') }}
132132
id: login-ecr
133133
continue-on-error: true
134134
uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
@@ -166,17 +166,17 @@ jobs:
166166
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
167167
with:
168168
driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '570.133.07' }}
169-
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && matrix.runner != 'B200' }}
169+
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
170170

171171
- name: Setup GPU_FLAG for docker run
172172
id: setup-gpu-flag
173173
run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
174-
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || matrix.runner == 'B200') }}
174+
if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || contains(matrix.runner, 'b200')) }}
175175

176176
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
177177
id: setup-sscache-port-flag
178178
run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
179-
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && matrix.runner != 'B200' }}
179+
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && !contains(matrix.runner, 'b200') }}
180180

181181
- name: Lock NVIDIA A100 40GB Frequency
182182
run: |
@@ -277,8 +277,8 @@ jobs:
277277
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
278278
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
279279
# Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
280-
SCCACHE_BUCKET: ${{ matrix.runner != 'B200' && 'ossci-compiler-cache-circleci-v2' || '' }}
281-
SCCACHE_REGION: ${{ matrix.runner != 'B200' && 'us-east-1' || '' }}
280+
SCCACHE_BUCKET: ${{ !contains(matrix.runner, 'b200') && 'ossci-compiler-cache-circleci-v2' || '' }}
281+
SCCACHE_REGION: ${{ !contains(matrix.runner, 'b200') && 'us-east-1' || '' }}
282282
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
283283
DOCKER_IMAGE: ${{ inputs.docker-image }}
284284
XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
@@ -403,7 +403,7 @@ jobs:
403403
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
404404

405405
- name: Authenticate with AWS
406-
if: ${{ matrix.runner == 'B200' }}
406+
if: ${{ contains(matrix.runner, 'b200') }}
407407
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
408408
with:
409409
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results

.github/workflows/docker-builds.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ jobs:
7676
pytorch-linux-jammy-py3-clang12-onnx,
7777
pytorch-linux-jammy-linter,
7878
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter,
79-
pytorch-linux-jammy-py3-clang12-executorch,
79+
# Executorch pin needs update
80+
# pytorch-linux-jammy-py3-clang12-executorch,
8081
pytorch-linux-jammy-py3.12-triton-cpu
8182
]
8283
include:
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
name: inductor-perf-b200
2+
3+
on:
4+
schedule:
5+
- cron: 0 7 * * 1-6
6+
- cron: 0 7 * * 0
7+
# NB: GitHub has an upper limit of 10 inputs here, so before we can sort it
8+
# out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
9+
workflow_dispatch:
10+
inputs:
11+
training:
12+
description: Run training (on by default)?
13+
required: false
14+
type: boolean
15+
default: true
16+
inference:
17+
description: Run inference (on by default)?
18+
required: false
19+
type: boolean
20+
default: true
21+
default:
22+
description: Run inductor_default?
23+
required: false
24+
type: boolean
25+
default: false
26+
dynamic:
27+
description: Run inductor_dynamic_shapes?
28+
required: false
29+
type: boolean
30+
default: false
31+
cppwrapper:
32+
description: Run inductor_cpp_wrapper?
33+
required: false
34+
type: boolean
35+
default: false
36+
cudagraphs:
37+
description: Run inductor_cudagraphs?
38+
required: false
39+
type: boolean
40+
default: true
41+
freezing_cudagraphs:
42+
description: Run inductor_cudagraphs with freezing for inference?
43+
required: false
44+
type: boolean
45+
default: false
46+
aotinductor:
47+
description: Run aot_inductor for inference?
48+
required: false
49+
type: boolean
50+
default: false
51+
maxautotune:
52+
description: Run inductor_max_autotune?
53+
required: false
54+
type: boolean
55+
default: false
56+
benchmark_configs:
57+
description: The list of configs used the benchmark
58+
required: false
59+
type: string
60+
default: inductor_huggingface_perf_cuda_b200,inductor_timm_perf_cuda_b200,inductor_torchbench_perf_cuda_b200
61+
62+
concurrency:
63+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
64+
cancel-in-progress: true
65+
66+
permissions:
67+
id-token: write
68+
contents: read
69+
70+
jobs:
71+
get-label-type:
72+
name: get-label-type
73+
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
74+
if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }}
75+
with:
76+
triggering_actor: ${{ github.triggering_actor }}
77+
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
78+
curr_branch: ${{ github.head_ref || github.ref_name }}
79+
curr_ref_type: ${{ github.ref_type }}
80+
opt_out_experiments: lf
81+
82+
build:
83+
name: cuda12.8-py3.10-gcc9-sm100
84+
uses: ./.github/workflows/_linux-build.yml
85+
needs: get-label-type
86+
with:
87+
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
88+
# Use a bigger runner here because CUDA_ARCH 9.0 is only built for H100
89+
# or newer GPUs, so it doesn't benefit much from existing compiler cache
90+
# from trunk. Also use a memory-intensive runner here because memory is
91+
# usually the bottleneck
92+
runner: linux.12xlarge.memory
93+
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
94+
docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
95+
cuda-arch-list: '10.0'
96+
test-matrix: |
97+
{ include: [
98+
{ config: "inductor_huggingface_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
99+
{ config: "inductor_timm_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
100+
{ config: "inductor_torchbench_perf_cuda_b200", shard: 1, num_shards: 1, runner: "linux.dgx.b200" },
101+
]}
102+
selected-test-configs: ${{ inputs.benchmark_configs }}
103+
build-additional-packages: "vision audio fbgemm torchao"
104+
secrets: inherit
105+
106+
test-periodically:
107+
name: cuda12.8-py3.10-gcc9-sm100
108+
uses: ./.github/workflows/_linux-test.yml
109+
needs: build
110+
if: github.event.schedule == '0 7 * * 1-6'
111+
with:
112+
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
113+
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-cudagraphs_low_precision-true
114+
docker-image: ${{ needs.build.outputs.docker-image }}
115+
test-matrix: ${{ needs.build.outputs.test-matrix }}
116+
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
117+
timeout-minutes: 720
118+
disable-monitor: false
119+
monitor-log-interval: 15
120+
monitor-data-collect-interval: 4
121+
secrets: inherit
122+
123+
test-weekly:
124+
name: cuda12.8-py3.10-gcc9-sm100
125+
uses: ./.github/workflows/_linux-test.yml
126+
needs: build
127+
if: github.event.schedule == '0 7 * * 0'
128+
with:
129+
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
130+
dashboard-tag: training-true-inference-true-default-true-dynamic-true-cudagraphs-true-cppwrapper-true-aotinductor-true-freezing_cudagraphs-true-maxautotune-true-freeze_autotune_cudagraphs-true-cudagraphs_low_precision-true
131+
docker-image: ${{ needs.build.outputs.docker-image }}
132+
test-matrix: ${{ needs.build.outputs.test-matrix }}
133+
timeout-minutes: 1440
134+
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
135+
disable-monitor: false
136+
monitor-log-interval: 15
137+
monitor-data-collect-interval: 4
138+
secrets: inherit
139+
140+
test:
141+
name: cuda12.8-py3.10-gcc9-sm100
142+
uses: ./.github/workflows/_linux-test.yml
143+
needs: build
144+
with:
145+
build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm100
146+
dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
147+
docker-image: ${{ needs.build.outputs.docker-image }}
148+
test-matrix: ${{ needs.build.outputs.test-matrix }}
149+
aws-role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
150+
timeout-minutes: 720
151+
disable-monitor: false
152+
monitor-log-interval: 15
153+
monitor-data-collect-interval: 4
154+
secrets: inherit

0 commit comments

Comments
 (0)