pytorch · Jack-Khuu · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -123,6 +123,7 @@ jobs:
           bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
       - name: Run validation
         run: |
+          # @NOCOMMIT Debug
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           pushd ${TORCHCHAT_ROOT}
           bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
@@ -164,39 +165,37 @@ jobs:
           bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float16"
 
   test-cpu-eval-sanity-check-float32:
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     name: test-cpu-eval-sanity-check-float32 (${{ matrix.platform }}, ${{ matrix.model_name }})
     needs: gather-models-cpu
     strategy:
       matrix: ${{ fromJSON(needs.gather-models-cpu.outputs.models) }}
       fail-fast: false
-    runs-on: ${{ matrix.runner }}
-    env:
-      TORCHCHAT_ROOT: ${{ github.workspace }}
-      REPO_NAME: ${{ matrix.repo_name }}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-      - name: Setup Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.10.11'
-      - name: Print machine info
-        run: |
-          echo "$(uname -a)"
-      - name: Install dependencies
-        run: |
-          ./install/install_requirements.sh
-          pip3 list
-          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-      - name: Download checkpoints
-        run: |
-          bash ${TORCHCHAT_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
-      - name: Run validation
-        run: |
-          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-          pushd ${TORCHCHAT_ROOT}
-          bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
-          bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
+    with:
+      runner: linux.4xlarge
+      script: |
+        echo "::group::Print machine info"
+        uname -a
+        echo "::endgroup::"
+
+        echo "::group::Install dependencies"
+        ./install/install_requirements.sh
+        pip3 list
+        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+        echo "::endgroup::"
+
+        echo "::group::Download checkpoint"
+        export REPO_NAME=${{ matrix.repo_name }}
+        bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
+        echo "::endgroup::"
+
+        echo "::group::Convert checkpoint"
+        bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
+        echo "::endgroup::"
+
+        echo "::group::Run eval"
+        bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cpu" "eval_sanity_check-float32"
+        echo "::endgroup::"
 
   gather-models-gpu:
     runs-on: ubuntu-22.04

diff --git a/install/install_requirements.sh b/install/install_requirements.sh
@@ -103,5 +103,5 @@ fi
 
 (
   set -x
-  $PIP_EXECUTABLE install lm-eval=="0.4.2"
+  $PIP_EXECUTABLE install lm-eval=="0.4.2" evaluate=="0.4.3" psutil=="6.0.0"
 )
diff --git a/torchchat.py b/torchchat.py
@@ -8,6 +8,7 @@
 import logging
 import subprocess
 import sys
+import lm_eval  # noqa
 
 from torchchat.cli.cli import (
     add_arguments_for_verb,

diff --git a/torchchat/usages/eval.py b/torchchat/usages/eval.py
@@ -10,6 +10,8 @@
 import torch._dynamo.config
 import torch._inductor.config
 
+import lm_eval  # noqa
+
 from torchchat.cli.builder import (
     _initialize_model,
     _initialize_tokenizer,
@@ -28,8 +30,6 @@
 torch._inductor.config.triton.cudagraphs = True
 torch._dynamo.config.cache_size_limit = 100000
 
-import lm_eval
-
 from lm_eval.evaluator import evaluate
 from lm_eval.models.huggingface import HFLM as eval_wrapper
 from lm_eval.tasks import get_task_dict
-Original file line number
+Diff line change
@@ Expand Up / @@ -103,5 +103,5 @@ fi @@
     (
       set -x
-      $PIP_EXECUTABLE install lm-eval=="0.4.2"
+      $PIP_EXECUTABLE install lm-eval=="0.4.2" evaluate=="0.4.3" psutil=="6.0.0"
     )