Move PT pin and AO commit hash to benefit from SDPA MPS implementation (#964)

manuelcandales · web-flow · commit ce41944a45eb · 2024-08-08T14:55:15.000-04:00
* Move PT pin and AO commit hash to benefit from SDPA MPS implementation in PT

* Move PT nightly to 2024-07-28

* Shuffle weights prior to _convert_weight_to_int4pack call in gguf_loader

* Uninstall torchao on M1 before re-installing it

* Update ET pin

* Uninstall torchao on M1 before re-installing it

* Update ET pin
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -627,11 +627,14 @@ jobs:
       runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
       script: |
         set -x
-        # NS: Remove previous installation  of torch first
-        # as this script does not isntall anything into conda env but rather as system dep
+        # NS/MC: Remove previous installation of torch and torchao first
+        # as this script does not install anything into conda env but rather as system dep
         pip3 uninstall -y torch || true
         set -eou pipefail
 
+        pip3 uninstall -y torchao || true
+        set -eou pipefail
+
         echo "::group::Print machine info"
         uname -a
         sysctl machdep.cpu.brand_string
@@ -736,10 +739,12 @@ jobs:
       runner: macos-m1-stable  # needs MPS, was macos-m1-stable
       script: |
         set -x
-        # NS: Remove previous installation  of torch first
-        # as this script does not isntall anything into conda env but rather as system dep
+        # NS/MC: Remove previous installation of torch and torchao first
+        # as this script does not install anything into conda env but rather as system dep
         pip3 uninstall -y torch || true
+        set -eou pipefail
 
+        pip3 uninstall -y torchao || true
         set -eou pipefail
 
         echo "::group::Print machine info"
diff --git a/.pins/et-pin.txt b/.pins/et-pin.txt
@@ -1 +1 @@
-c7574994ecd775fdaacc0f2de27089526e05b108
+91298923a0076c1b41059efb6dad2876426e4b03
diff --git a/build/gguf_loader.py b/build/gguf_loader.py
@@ -170,8 +170,9 @@ def load_model_and_state_dict(
             if load_state_dict:
                 q, s, z = Q4_0.unpack(t)
                 scales_and_zeros = pack_scales_and_zeros(s, z)
+                q_uint8 = (q[::, ::2] << 4 | q[::, 1::2]).to(torch.uint8)
                 weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
-                    q, inner_k_tiles
+                    q_uint8, inner_k_tiles
                 )
                 state_dict[f"{fqn}.weight"] = weight_int4pack
                 state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros
diff --git a/install_requirements.sh b/install_requirements.sh
@@ -47,7 +47,7 @@ fi
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-NIGHTLY_VERSION=dev20240710
+NIGHTLY_VERSION=dev20240728
 
 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
 (
@@ -82,7 +82,7 @@ REQUIREMENTS_TO_INSTALL=(
 # TODO: Remove this and install nightly build, once it supports macos
 (
   set -x
-  $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d36de1b144b73bf753bd082109c2b5d0141abd5b
+  $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d477c0e59b458b5617dcb3e999290a87df3070d8
 )
 if [[ -x "$(command -v nvidia-smi)" ]]; then
   (
diff --git a/quantization/qops.py b/quantization/qops.py
@@ -401,8 +401,9 @@ def _prepare_weight_and_scales_and_zeros(
         weight_int32, scales_and_zeros = group_quantize_tensor(
             weight_bf16, n_bit=4, groupsize=groupsize
         )
+        weight_uint8 = (weight_int32[::, ::2] << 4 | weight_int32[::, 1::2]).to(torch.uint8)
         weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
-            weight_int32, inner_k_tiles
+            weight_uint8, inner_k_tiles
         )
         return weight_int4pack, scales_and_zeros
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-c7574994ecd775fdaacc0f2de27089526e05b108`
	`1`	`+91298923a0076c1b41059efb6dad2876426e4b03`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ fi`
`47`	`47`	`# NOTE: If a newly-fetched version of the executorch repo changes the value of`
`48`	`48`	`# NIGHTLY_VERSION, you should re-run this script to install the necessary`
`49`	`49`	`# package versions.`
`50`		`-NIGHTLY_VERSION=dev20240710`
	`50`	`+NIGHTLY_VERSION=dev20240728`
`51`	`51`
`52`	`52`	`# Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same`
`53`	`53`	`(`
`@@ -82,7 +82,7 @@ REQUIREMENTS_TO_INSTALL=(`
`82`	`82`	`# TODO: Remove this and install nightly build, once it supports macos`
`83`	`83`	`(`
`84`	`84`	`set -x`
`85`		`- $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d36de1b144b73bf753bd082109c2b5d0141abd5b`
	`85`	`+ $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@d477c0e59b458b5617dcb3e999290a87df3070d8`
`86`	`86`	`)`
`87`	`87`	`if [[ -x "$(command -v nvidia-smi)" ]]; then`
`88`	`88`	`(`
Original file line number	Diff line number	Diff line change
`@@ -401,8 +401,9 @@ def _prepare_weight_and_scales_and_zeros(`
`401`	`401`	`weight_int32, scales_and_zeros = group_quantize_tensor(`
`402`	`402`	`weight_bf16, n_bit=4, groupsize=groupsize`
`403`	`403`	`)`
	`404`	`+ weight_uint8 = (weight_int32[::, ::2] << 4 \| weight_int32[::, 1::2]).to(torch.uint8)`
`404`	`405`	`weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(`
`405`		`- weight_int32, inner_k_tiles`
	`406`	`+ weight_uint8, inner_k_tiles`
`406`	`407`	`)`
`407`	`408`	`return weight_int4pack, scales_and_zeros`
`408`	`409`