@@ -123,6 +123,7 @@ jobs:
123123 bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
124124 - name : Run validation
125125 run : |
126+ # @NOCOMMIT Debug
126127 python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
127128 pushd ${TORCHCHAT_ROOT}
128129 bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
@@ -164,39 +165,37 @@ jobs:
164165 bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"
165166
166167 test-cpu-eval-sanity-check-float32 :
168+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
167169 name : test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
168170 needs : gather-models-cpu
169171 strategy :
170172 matrix : ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
171173 fail-fast : false
172- runs-on : ${{ matrix.runner }}
173- env :
174- TORCHCHAT_ROOT : ${{ github.workspace }}
175- REPO_NAME : ${{ matrix.repo_name }}
176- steps :
177- - name : Checkout repo
178- uses : actions/checkout@v3
179- - name : Setup Python
180- uses : actions/setup-python@v4
181- with :
182- python-version : ' 3.10.11'
183- - name : Print machine info
184- run : |
185- echo "$(uname -a)"
186- - name : Install dependencies
187- run : |
188- ./install/install_requirements.sh
189- pip3 list
190- python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
191- - name : Download checkpoints
192- run : |
193- bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
194- - name : Run validation
195- run : |
196- python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
197- pushd ${TORCHCHAT_ROOT}
198- bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
199- bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
174+ with :
175+ runner : linux.4xlarge
176+ script : |
177+ echo "::group::Print machine info"
178+ uname -a
179+ echo "::endgroup::"
180+
181+ echo "::group::Install dependencies"
182+ ./install/install_requirements.sh
183+ pip3 list
184+ python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
185+ echo "::endgroup::"
186+
187+ echo "::group::Download checkpoint"
188+ export REPO_NAME=${{ matrix.repo_name }}
189+ bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
190+ echo "::endgroup::"
191+
192+ echo "::group::Convert checkpoint"
193+ bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
194+ echo "::endgroup::"
195+
196+ echo "::group::Run eval"
197+ bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
198+ echo "::endgroup::"
200199
201200 gather-models-gpu :
202201 runs-on : ubuntu-22.04
0 commit comments