pytorch · Jack-Khuu · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -731,6 +731,7 @@ jobs:
 
           git clone https://github.com/ggerganov/llama.cpp.git
           pushd llama.cpp
+          git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3
           make
           popd
 

diff --git a/.watchman-cookie-jackkhuu-mbp-1567-1101 b/.watchman-cookie-jackkhuu-mbp-1567-1101
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
@@ -62,13 +62,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20241002
+PYTORCH_NIGHTLY_VERSION=dev20241113
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20241002
+VISION_NIGHTLY_VERSION=dev20241113
 
 # Nightly version for torchtune
-TUNE_NIGHTLY_VERSION=dev20241010
+TUNE_NIGHTLY_VERSION=dev20241126
 
 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
 (
@@ -81,7 +81,7 @@ TUNE_NIGHTLY_VERSION=dev20241010
 # with cuda for faster execution on cuda GPUs.
 if [[ -x "$(command -v nvidia-smi)" ]];
 then
-  TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu121"
+  TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu124"
 elif [[ -x "$(command -v rocminfo)" ]];
 then
   TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/rocm6.2"
@@ -93,7 +93,7 @@ fi
 REQUIREMENTS_TO_INSTALL=(
   torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}"
   torchvision=="0.20.0.${VISION_NIGHTLY_VERSION}"
-  torchtune=="0.4.0.${TUNE_NIGHTLY_VERSION}"
+  torchtune=="0.5.0.${TUNE_NIGHTLY_VERSION}"
 )
 
 # Install the requirements. --extra-index-url tells pip to look for package
@@ -106,7 +106,7 @@ REQUIREMENTS_TO_INSTALL=(
 
 (
   set -x
-  $PIP_EXECUTABLE install torchao=="0.5.0"
+  $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@2f97b0955953fa1a46594a27f0df2bc48d93e79d
 )
 
 if [[ -x "$(command -v nvidia-smi)" ]]; then

diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py
@@ -373,6 +373,7 @@ def _load_model_gguf(builder_args: BuilderArgs) -> Model:
         kwargs = {}
     else:
         kwargs = builder_args.gguf_kwargs
+    kwargs.setdefault("device", builder_args.device)
     model = Model.from_gguf(builder_args.gguf_path, **kwargs)
     return model
 

diff --git a/torchchat/utils/gguf_loader.py b/torchchat/utils/gguf_loader.py
@@ -570,6 +570,7 @@ def load_model_and_state_dict(
     load_state_dict: bool = True,
     load_as_quantized: bool = True,
     inner_k_tiles=8,
+    device="cpu",
 ) -> torch.nn.Module:
     """
     Parses the GGUF file and returns an nn.Module on meta device along with a state_dict
@@ -609,9 +610,14 @@ def load_model_and_state_dict(
                 q, s, z = Q4_0.unpack(t)
                 scales_and_zeros = pack_scales_and_zeros(s, z)
                 q_uint8 = (q[::, ::2] << 4 | q[::, 1::2]).to(torch.uint8)
-                weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
-                    q_uint8, inner_k_tiles
-                )
+                if torch.device(device).type == "cpu":
+                    weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
+                        q_uint8, inner_k_tiles
+                    )
+                else:
+                    weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
+                        q_uint8, inner_k_tiles
+                    )
                 state_dict[f"{fqn}.weight"] = weight_int4pack
                 state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros
-Original file line number
+Diff line change
@@ Expand Up / @@ -731,6 +731,7 @@ jobs: @@
               git clone https://github.com/ggerganov/llama.cpp.git
               pushd llama.cpp
+              git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3
               make
               popd
@@ Expand Down @@