Merge branch 'main' into new-intx-quantizer

Jack-Khuu · web-flow · commit c2108d6f5ba9 · 2025-02-19T17:36:38.000-08:00
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -291,6 +291,16 @@ jobs:
         bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
         echo "::endgroup::"
 
+        echo "::group::Run inference with quantize file"
+        for DEVICE in cpu; do # cuda 
+          # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
+          # follow up with torchao as a separate PR
+          echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
+          python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        done
+        echo "::endgroup::"
+
   test-gpu-aoti-float32:
     permissions:
       id-token: write
@@ -335,6 +345,11 @@ jobs:
         fi
         echo "::endgroup::"
 
+        # echo "::group::Run inference with quantize file"
+        # python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # echo "::endgroup::"
+        
   test-gpu-aoti-float16:
     permissions:
       id-token: write
@@ -376,10 +391,15 @@ jobs:
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) == Darwin ]; then
           python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
-             python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
+          python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
         fi
         echo "::endgroup::"
 
+        # echo "::group::Run inference with quantize file"
+        # python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+        # echo "::endgroup::"
+
   test-gpu-eval-sanity-check:
     permissions:
       id-token: write
@@ -495,10 +515,11 @@ jobs:
           python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
 
           echo "******************************************"
-          echo "*** --quantize torchchat/quant_config/mobile.json ***"
+          echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
+          echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
           echo "******************************************"
-          # python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
-          # python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
 
 
           echo "******************************************"
@@ -1147,10 +1168,6 @@ jobs:
         run: |
           echo "Installing runner"
           bash torchchat/utils/scripts/build_native.sh et link_torchao_ops
-      - name: Install runner AOTI
-        id: install-runner-aoti
-        run: |
-          bash torchchat/utils/scripts/build_native.sh aoti link_torchao_ops
       - name: Run inference
         run: |
           python torchchat.py download stories110M
diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt
@@ -1 +1 @@
-9836b39fe690e1906f133b4a233863149c30d499
+791472d6706b027552f39f11b28d034e4839c9af
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
@@ -51,13 +51,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20250124
+PYTORCH_NIGHTLY_VERSION=dev20250131
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20250124
+VISION_NIGHTLY_VERSION=dev20250131
 
 # Nightly version for torchtune
-TUNE_NIGHTLY_VERSION=dev20250124
+TUNE_NIGHTLY_VERSION=dev20250131
 
 # The pip repository that hosts nightly torch packages. cpu by default.
 # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly
diff --git a/torchchat/cli/cli.py b/torchchat/cli/cli.py
@@ -549,7 +549,7 @@ def arg_init(args):
         precision_handler = args.quantize.get("precision", None)
         if precision_handler:
             if precision_handler["dtype"] != args.dtype:
-                print('overriding json-specified dtype {precision_handler["dtype"]} with cli dtype {args.dtype}')
+                print(f'overriding json-specified dtype {precision_handler["dtype"]} with cli dtype {args.dtype}')
                 precision_handler["dtype"] = args.dtype
 
     if getattr(args, "output_pte_path", None):
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
@@ -86,6 +86,9 @@ if [[ "$TARGET" == "et" ]]; then
     EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
     install_torchao_executorch_ops
   fi
+elif [[ "$LINK_TORCHAO_OPS" == "ON" ]]; then
+  # Install OMP when using AOTI with linked torchao ops
+  brew install libomp
 fi
 popd
 
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
@@ -88,10 +88,10 @@ install_executorch_python_libs() {
   echo "Building and installing python libraries"
   if [ "${ENABLE_ET_PYBIND}" = false ]; then
       echo "Not installing pybind"
-      bash ./install_requirements.sh --pybind off
+      bash ./install_executorch.sh --pybind off
   else
       echo "Installing pybind"
-      bash ./install_requirements.sh --pybind xnnpack
+      bash ./install_executorch.sh --pybind xnnpack
   fi
 
   # TODO: figure out the root cause of 'AttributeError: module 'evaluate'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-9836b39fe690e1906f133b4a233863149c30d499`
	`1`	`+791472d6706b027552f39f11b28d034e4839c9af`