Skip to content

Commit 3d89512

Browse files
committed
Update on "Fix Cuda out of memory issue for eager runner"
This PR updates the eager runner to disable grad and save memory usage. It also update the prompt format to not include bos. Differential Revision: [D65962743](https://our.internmc.facebook.com/intern/diff/D65962743/) [ghstack-poisoned]
2 parents 310f1a1 + ad5fcc9 commit 3d89512

File tree

111 files changed

+827608
-570
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+827608
-570
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"resnet50": "linux.12xlarge",
2626
"llava": "linux.12xlarge",
2727
"llama3_2_vision_encoder": "linux.12xlarge",
28+
"llama3_2_text_decoder": "linux.12xlarge",
2829
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
2930
"dl3": "linux.12xlarge",
3031
"emformer_join": "linux.12xlarge",

.ci/scripts/test_model.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ test_model() {
8787
bash examples/models/llava/install_requirements.sh
8888
STRICT="--no-strict"
8989
fi
90+
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" || "$MODEL_NAME" == "llama3_2_text_decoder" ]]; then
91+
# Install requirements for llama vision.
92+
bash examples/models/llama3_2_vision/install_requirements.sh
93+
fi
9094
# python3 -m examples.portable.scripts.export --model_name="llama2" should works too
9195
"${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
9296
run_portable_executor_runner

.github/workflows/apple.yml

Lines changed: 67 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,26 @@ on:
2020
- extension/benchmark/apple/**
2121
- extension/module/**
2222
workflow_dispatch:
23+
schedule:
24+
- cron: '0 10 * * *' # Runs daily at 2 AM PST
2325

2426
concurrency:
2527
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
2628
cancel-in-progress: true
2729

2830
jobs:
31+
set-version:
32+
runs-on: ubuntu-22.04
33+
outputs:
34+
version: ${{ steps.set_version.outputs.version }}
35+
steps:
36+
- name: Set VERSION variable
37+
id: set_version
38+
shell: bash
39+
run: |
40+
VERSION="0.4.0.$(TZ='PST8PDT' date +%Y%m%d)"
41+
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
42+
2943
build-demo-ios:
3044
name: build-demo-ios
3145
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -39,6 +53,8 @@ jobs:
3953
secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_DEMO_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
4054
upload-artifact: ios-apps
4155
script: |
56+
set -eux
57+
4258
BUILD_TOOL=cmake
4359
4460
.ci/scripts/setup-conda.sh
@@ -57,7 +73,7 @@ jobs:
5773
5874
# Build and test iOS Demo App
5975
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
60-
build/test_ios_ci.sh ${ARTIFACTS_DIR_NAME}
76+
build/test_ios_ci.sh "${ARTIFACTS_DIR_NAME}"
6177
6278
# Upload the test demo app to S3
6379
upload-demo-ios:
@@ -75,6 +91,7 @@ jobs:
7591
shell: bash
7692
working-directory: ${{ runner.temp }}/artifacts/
7793
run: |
94+
set -eux
7895
ls -lah ./
7996
8097
- name: Upload the artifacts to S3
@@ -112,6 +129,7 @@ jobs:
112129

113130
build-frameworks-ios:
114131
name: build-frameworks-ios
132+
needs: set-version
115133
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
116134
with:
117135
runner: macos-latest-xlarge
@@ -121,8 +139,10 @@ jobs:
121139
upload-artifact: executorch-frameworks-ios
122140
timeout: 90
123141
script: |
142+
set -eux
143+
124144
BUILD_TOOL=cmake
125-
VERSION="latest"
145+
VERSION="${{ needs.set-version.outputs.version }}"
126146
FRAMEWORKS=(
127147
"executorch"
128148
"backend_coreml"
@@ -171,13 +191,17 @@ jobs:
171191
172192
upload-frameworks-ios:
173193
runs-on: ubuntu-22.04
174-
needs: build-frameworks-ios
194+
needs: [build-frameworks-ios, set-version]
175195
timeout-minutes: 30
196+
environment: ${{ github.ref == 'refs/heads/main' && 'cherry-pick-bot' || '' }}
176197
permissions:
177198
id-token: write
178-
contents: read
199+
contents: write
179200
steps:
180201
- uses: actions/checkout@v3
202+
with:
203+
fetch-depth: 0
204+
token: ${{ secrets.GH_PYTORCHBOT_CHERRY_PICK_TOKEN || secrets.GITHUB_TOKEN }}
181205
- uses: actions/setup-python@v4
182206
with:
183207
python-version: '3.11'
@@ -194,15 +218,15 @@ jobs:
194218
name: executorch-frameworks-ios
195219
path: ${{ runner.temp }}/frameworks-ios/
196220
- name: Only push to S3 when running the workflow manually from main branch
197-
if: ${{ github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main' }}
221+
if: ${{ (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/main' }}
198222
shell: bash
199223
run: |
200-
set -eux
201224
echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}"
202225
- name: Upload the artifact to ossci-ios S3 bucket
203226
shell: bash
204227
run: |
205228
set -eux
229+
VERSION="${{ needs.set-version.outputs.version }}"
206230
207231
pip install awscli==1.32.18
208232
@@ -213,9 +237,44 @@ jobs:
213237
214238
for FILENAME in "${RUNNER_TEMP}"/frameworks-ios/*.zip; do
215239
[ -e "${FILENAME}" ] || continue
216-
shasum -a 256 "${FILENAME}"
240+
FRAMEWORK_NAME=$(basename "${FILENAME}" | sed "s/-${VERSION}.zip//")
241+
CHECKSUM=$(shasum -a 256 "${FILENAME}" | cut -d ' ' -f1)
242+
echo "${FRAMEWORK_NAME} ${CHECKSUM}" >> "${RUNNER_TEMP}/checksums.txt"
217243
${AWS_CMD} "${FILENAME}" s3://ossci-ios/executorch/ --acl public-read
218244
done
245+
- name: Update SwiftPM
246+
shell: bash
247+
run: |
248+
set -eux
249+
VERSION="${{ needs.set-version.outputs.version }}"
250+
BRANCH="swiftpm-${VERSION}"
251+
252+
git checkout swiftpm
253+
254+
if git show-ref --verify --quiet refs/heads/${BRANCH}; then
255+
git checkout "${BRANCH}"
256+
else
257+
git checkout -b "${BRANCH}"
258+
fi
259+
260+
[[ -f Package.swift ]] || mv Package.swift.template Package.swift
261+
262+
sed -i "s/__VERSION__/${VERSION}/g" Package.swift
263+
264+
while read -r FRAMEWORK CHECKSUM; do
265+
sed -i "s/__SHA256_${FRAMEWORK}__/${CHECKSUM}/g" Package.swift
266+
done < "${RUNNER_TEMP}/checksums.txt"
267+
268+
if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then
269+
git config --global user.name "PyTorch Bot"
270+
git config --global user.email "[email protected]"
271+
git add Package.swift
272+
git commit -am "${VERSION}"
273+
git push -f origin "${BRANCH}"
274+
else
275+
echo "Draft Package.swift:"
276+
cat Package.swift
277+
fi
219278
220279
build-benchmark-app:
221280
name: build-benchmark-app
@@ -281,5 +340,5 @@ jobs:
281340
echo "::group::Build ExecuTorch benchmark app"
282341
mkdir -p extension/benchmark/apple/Benchmark/Models
283342
${CONDA_RUN} --no-capture-output \
284-
build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}
343+
build/build_apple_llm_demo.sh "${ARTIFACTS_DIR_NAME}"
285344
echo "::endgroup::"

.github/workflows/pull.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ jobs:
7272
conda activate "${CONDA_ENV}"
7373
7474
MODEL_NAME=${{ matrix.model }}
75-
# Install requirements for llama vision
76-
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then
77-
bash examples/models/llama3_2_vision/install_requirements.sh
78-
fi
7975
BUILD_TOOL=${{ matrix.build-tool }}
8076
BACKEND=${{ matrix.backend }}
8177
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}

.github/workflows/trunk.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ jobs:
5858
bash .ci/scripts/setup-conda.sh
5959
# Setup MacOS dependencies as there is no Docker support on MacOS atm
6060
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
61-
# Install requirements for llama vision
62-
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" ]]; then
63-
${CONDA_RUN} bash examples/models/llama3_2_vision/install_requirements.sh
64-
fi
6561
# Build and test executorch
6662
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
6763
@@ -415,8 +411,6 @@ jobs:
415411
pip install -U "huggingface_hub[cli]"
416412
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
417413
pip install accelerate sentencepiece
418-
# TODO(guangyang): Switch to use released transformers library after all required patches are included
419-
pip install "git+https://github.com/huggingface/transformers.git@6cc4dfe3f1e8d421c6d6351388e06e9b123cbfe1"
420414
pip list
421415
echo "::endgroup::"
422416

backends/arm/_passes/arm_pass_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8+
# pyre-unsafe
9+
810
from typing import Optional
911

1012
import torch

backends/arm/_passes/cast_int64_pass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
# pyre-unsafe
7+
68
import torch
79
from executorch.exir.pass_base import ExportPass, PassResult
810

backends/arm/_passes/conv1d_unsqueeze_pass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8+
# pyre-unsafe
9+
810

911
import torch
1012
from executorch.backends.arm._passes.arm_pass_utils import (

backends/arm/_passes/decompose_div_pass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
# pyre-unsafe
8+
79
import torch
810
from executorch.exir.dialects._ops import ops as exir_ops
911
from executorch.exir.pass_base import ExportPass

backends/arm/_passes/decompose_layernorm_pass.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
# pyre-unsafe
8+
79
import operator
810

911
import torch

0 commit comments

Comments
 (0)