Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit c5fc5dc

Browse files
committed
Fix eval sanity check CI
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent c454026 commit c5fc5dc

File tree

1 file changed

+27
-28
lines changed

1 file changed

+27
-28
lines changed

.github/workflows/pull.yml

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ jobs:
123123
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
124124
- name: Run validation
125125
run: |
126+
# @NOCOMMIT Debug
126127
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
127128
pushd ${TORCHCHAT_ROOT}
128129
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
@@ -164,39 +165,37 @@ jobs:
164165
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"
165166
166167
test-cpu-eval-sanity-check-float32:
168+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
167169
name: test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
168170
needs: gather-models-cpu
169171
strategy:
170172
matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
171173
fail-fast: false
172-
runs-on: ${{ matrix.runner }}
173-
env:
174-
TORCHCHAT_ROOT: ${{ github.workspace }}
175-
REPO_NAME: ${{ matrix.repo_name }}
176-
steps:
177-
- name: Checkout repo
178-
uses: actions/checkout@v3
179-
- name: Setup Python
180-
uses: actions/setup-python@v4
181-
with:
182-
python-version: '3.10.11'
183-
- name: Print machine info
184-
run: |
185-
echo "$(uname -a)"
186-
- name: Install dependencies
187-
run: |
188-
./install/install_requirements.sh
189-
pip3 list
190-
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
191-
- name: Download checkpoints
192-
run: |
193-
bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
194-
- name: Run validation
195-
run: |
196-
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
197-
pushd ${TORCHCHAT_ROOT}
198-
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
199-
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
174+
with:
175+
runner: linux.4xlarge
176+
script: |
177+
echo "::group::Print machine info"
178+
uname -a
179+
echo "::endgroup::"
180+
181+
echo "::group::Install dependencies"
182+
./install/install_requirements.sh
183+
pip3 list
184+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
185+
echo "::endgroup::"
186+
187+
echo "::group::Download checkpoint"
188+
export REPO_NAME=${{ matrix.repo_name }}
189+
bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
190+
echo "::endgroup::"
191+
192+
echo "::group::Convert checkpoint"
193+
bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
194+
echo "::endgroup::"
195+
196+
echo "::group::Run eval"
197+
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
198+
echo "::endgroup::"
200199
201200
gather-models-gpu:
202201
runs-on: ubuntu-22.04

0 commit comments

Comments
 (0)