Add torchao (#1182)

metascroy · metascroy · commit 90ab9e388ebb · 2024-09-30T14:02:36.000-07:00
* init

* update install utils

* update

* update libs

* update torchao pin

* fix ci test

* add python et install to ci

* fix ci errors

* fixes

* fixes

* fixes

* fixes

* fixes

* fixes

* fixes
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -621,71 +621,87 @@ jobs:
           python torchchat.py remove stories15m
 
   test-mps:
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
-      script: |
-        export PYTHON_VERSION="3.10"
-        set -x
-        # NS/MC: Remove previous installation of torch and torchao first
-        # as this script does not install anything into conda env but rather as system dep
-        pip3 uninstall -y torch || true
-        set -eou pipefail
-
-        pip3 uninstall -y torchao || true
-        set -eou pipefail
-
-        echo "::group::Print machine info"
-        uname -a
-        sysctl machdep.cpu.brand_string
-        sysctl machdep.cpu.core_count
-        echo "::endgroup::"
+    strategy:
+      matrix:
+        runner: [macos-m1-stable ]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10.11
+      - name: Print machine info
+        run: |
+          uname -a
+          if [ $(uname -s) == Darwin ]; then
+            sysctl machdep.cpu.brand_string
+            sysctl machdep.cpu.core_count
+          fi
+      - name: Run test
+        run: |
+          export PYTHON_VERSION="3.10"
+          set -x
+          # NS/MC: Remove previous installation of torch and torchao first
+          # as this script does not install anything into conda env but rather as system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
 
-        echo "::group::Install requirements"
-        # Install requirements
-        ./install/install_requirements.sh
-        ls -la
-        pwd
-        pip3 list
-        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-        echo "::endgroup::"
+          pip3 uninstall -y torchao || true
+          set -eou pipefail
 
-        echo "::group::Download checkpoints"
-        (
-          mkdir -p checkpoints/stories15M
-          pushd checkpoints/stories15M
-          curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
-          curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
-          popd
-        )
-        echo "::endgroup::"
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
 
-        echo "::group::Run inference"
-        export MODEL_PATH=checkpoints/stories15M/stories15M.pt
-        export MODEL_NAME=stories15M
-        export MODEL_DIR=/tmp
+          echo "::group::Install requirements"
+          # Install requirements
+          ./install/install_requirements.sh
+          ls -la
+          pwd
+          pip3 list
+          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+          echo "::endgroup::"
+
+          echo "::group::Download checkpoints"
+          (
+            mkdir -p checkpoints/stories15M
+            pushd checkpoints/stories15M
+            curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
+            curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
+            popd
+          )
+          echo "::endgroup::"
+
+          echo "::group::Run inference"
+          export MODEL_PATH=checkpoints/stories15M/stories15M.pt
+          export MODEL_NAME=stories15M
+          export MODEL_DIR=/tmp
 
-        python3 torchchat.py generate --device mps --checkpoint-path ${MODEL_PATH} --temperature 0
+          python3 torchchat.py generate --device mps --checkpoint-path ${MODEL_PATH} --temperature 0
 
-        echo "************************************************************"
-        echo "*** embedding"
-        echo "************************************************************"
+          echo "************************************************************"
+          echo "*** embedding"
+          echo "************************************************************"
 
-        python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
-        python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+          python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+          python3 torchchat.py generate --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
 
-        echo "************************************************************"
-        echo "*** linear int8"
-        echo "************************************************************"
+          echo "************************************************************"
+          echo "*** linear int8"
+          echo "************************************************************"
 
-        python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
-        python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+          python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+          python3 torchchat.py generate --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
 
-        echo "************************************************************"
-        echo "*** linear int4"
-        echo "************************************************************"
+          echo "************************************************************"
+          echo "*** linear int4"
+          echo "************************************************************"
 
-        PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+          PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
   test-gguf-util:
     strategy:
       matrix:
@@ -734,66 +750,82 @@ jobs:
 
           echo "Tests complete."
   test-mps-dtype:
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable  # needs MPS, was macos-m1-stable
-      script: |
-        export PYTHON_VERSION="3.10"
-        set -x
-        # NS/MC: Remove previous installation of torch and torchao first
-        # as this script does not install anything into conda env but rather as system dep
-        pip3 uninstall -y torch || true
-        set -eou pipefail
-
-        pip3 uninstall -y torchao || true
-        set -eou pipefail
-
-        echo "::group::Print machine info"
-        uname -a
-        sysctl machdep.cpu.brand_string
-        sysctl machdep.cpu.core_count
-        echo "::endgroup::"
+    strategy:
+      matrix:
+        runner: [macos-m1-stable ]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10.11
+      - name: Print machine info
+        run: |
+          uname -a
+          if [ $(uname -s) == Darwin ]; then
+            sysctl machdep.cpu.brand_string
+            sysctl machdep.cpu.core_count
+          fi
+      - name: Run test
+        run: |
+          export PYTHON_VERSION="3.10"
+          set -x
+          # NS/MC: Remove previous installation of torch and torchao first
+          # as this script does not install anything into conda env but rather as system dep
+          pip3 uninstall -y torch || true
+          set -eou pipefail
 
-        echo "::group::Install requirements"
-        # Install requirements
-        ./install/install_requirements.sh
-        ls -la
-        pwd
-        pip3 list
-        python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-        echo "::endgroup::"
+          pip3 uninstall -y torchao || true
+          set -eou pipefail
 
-        echo "::group::Download checkpoints"
-        (
-          mkdir -p checkpoints/stories15M
-          pushd checkpoints/stories15M
-          curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
-          curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
-          popd
-        )
-        echo "::endgroup::"
+          echo "::group::Print machine info"
+          uname -a
+          sysctl machdep.cpu.brand_string
+          sysctl machdep.cpu.core_count
+          echo "::endgroup::"
 
-        echo "::group::Run inference"
-        export MODEL_PATH=checkpoints/stories15M/stories15M.pt
-        export MODEL_NAME=stories15M
-        export MODEL_DIR=/tmp
-        for DTYPE in float16 float32; do
-          # if [ $(uname -s) == Darwin ]; then
-          #   export DTYPE=float16
-          # fi
+          echo "::group::Install requirements"
+          # Install requirements
+          ./install/install_requirements.sh
+          ls -la
+          pwd
+          pip3 list
+          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+          echo "::endgroup::"
+
+          echo "::group::Download checkpoints"
+          (
+            mkdir -p checkpoints/stories15M
+            pushd checkpoints/stories15M
+            curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
+            curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
+            popd
+          )
+          echo "::endgroup::"
+
+          echo "::group::Run inference"
+          export MODEL_PATH=checkpoints/stories15M/stories15M.pt
+          export MODEL_NAME=stories15M
+          export MODEL_DIR=/tmp
+          for DTYPE in float16 float32; do
+            # if [ $(uname -s) == Darwin ]; then
+            #   export DTYPE=float16
+            # fi
 
-          python3 torchchat.py generate --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0
+            python3 torchchat.py generate --dtype ${DTYPE} --device mps --checkpoint-path ${MODEL_PATH} --temperature 0
 
-          python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+            python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
 
-          python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+            python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
 
-          python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+            python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0
 
-          python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+            python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int8" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0
 
-          PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
-        done
+            PYTORCH_ENABLE_MPS_FALLBACK=1 python3 torchchat.py generate --dtype ${DTYPE} --device mps --quant '{"linear:int4" : {"groupsize": 32}}' --checkpoint-path ${MODEL_PATH} --temperature 0
+          done
   compile-gguf:
     strategy:
       matrix:
@@ -918,11 +950,11 @@ jobs:
       - name: Install ExecuTorch python
         run: |
           echo "Install ExecuTorch python"
-          pushd et-build/src/executorch
-          chmod +x ./install_requirements.sh
-          chmod +x ./install_requirements.py
-          ./install_requirements.sh
-          popd
+          export TORCHCHAT_ROOT=$PWD
+          export ET_BUILD_DIR="et-build"
+          ENABLE_ET_PYBIND="${1:-true}"
+          source "torchchat/utils/scripts/install_utils.sh"
+          install_executorch_python_libs $ENABLE_ET_PYBIND
       - name: Install runner
         run: |
           echo "Installing runner"
@@ -1067,14 +1099,12 @@ jobs:
           echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
       - name: Load or install ET
         id: install-et
-        uses: actions/cache@v3
-        env:
-          cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}
+        uses: actions/cache@v4
         with:
-          path: ./et-build
-          key: ${{env.cache-key}}
-          restore-keys: |
-            ${{env.cache-key}}
+          path: |
+            ./et-build
+            ./torchchat/utils/scripts
+          key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
       - if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
         continue-on-error: true
         run: |
@@ -1083,11 +1113,11 @@ jobs:
       - name: Install ExecuTorch python
         run: |
           echo "Install ExecuTorch python"
-          pushd et-build/src/executorch
-          chmod +x ./install_requirements.sh
-          chmod +x ./install_requirements.py
-          ./install_requirements.sh
-          popd
+          export TORCHCHAT_ROOT=$PWD
+          export ET_BUILD_DIR="et-build"
+          ENABLE_ET_PYBIND="${1:-true}"
+          source "torchchat/utils/scripts/install_utils.sh"
+          install_executorch_python_libs $ENABLE_ET_PYBIND
       - name: Install runner
         run: |
           echo "Installing runner"
diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh
@@ -19,10 +19,4 @@ pushd ${TORCHCHAT_ROOT}
 find_cmake_prefix_path
 clone_executorch
 install_executorch_libs $ENABLE_ET_PYBIND
-install_executorch_python_libs $ENABLE_ET_PYBIND
-# TODO: figure out the root cause of 'AttributeError: module 'evaluate'
-# has no attribute 'utils'' error from evaluate CI jobs and remove
-# `import lm_eval` from torchchat.py since it requires a specific version
-# of numpy.
-pip install numpy=='1.26.4'
 popd
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
@@ -93,6 +93,13 @@ install_executorch_python_libs() {
       echo "Installing pybind"
       bash ./install_requirements.sh --pybind xnnpack
   fi
+
+  # TODO: figure out the root cause of 'AttributeError: module 'evaluate'
+  # has no attribute 'utils'' error from evaluate CI jobs and remove
+  # `import lm_eval` from torchchat.py since it requires a specific version
+  # of numpy.
+  pip install numpy=='1.26.4'
+
   pip3 list
   popd
 }
@@ -169,10 +176,9 @@ clone_torchao() {
   pushd ${TORCHCHAT_ROOT}/torchao-build/src
   echo $pwd
 
-  cp -R $HOME/fbsource/fbcode/pytorch/ao .
-  # git clone https://github.com/pytorch/ao.git
-  # cd ao
-  # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-pin.txt)
+  git clone https://github.com/pytorch/ao.git
+  cd ao
+  git checkout $(cat ${TORCHCHAT_ROOT}/install/.pins/torchao-pin.txt)
 
   popd
 }