diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index ee7270a5d..c48436a80 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -731,6 +731,7 @@ jobs: git clone https://github.com/ggerganov/llama.cpp.git pushd llama.cpp + git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3 make popd diff --git a/.watchman-cookie-jackkhuu-mbp-1567-1101 b/.watchman-cookie-jackkhuu-mbp-1567-1101 new file mode 100755 index 000000000..e69de29bb diff --git a/install/install_requirements.sh b/install/install_requirements.sh index 635789de6..649a80ef6 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -62,13 +62,13 @@ echo "Using pip executable: $PIP_EXECUTABLE" # NOTE: If a newly-fetched version of the executorch repo changes the value of # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary # package versions. -PYTORCH_NIGHTLY_VERSION=dev20241002 +PYTORCH_NIGHTLY_VERSION=dev20241113 # Nightly version for torchvision -VISION_NIGHTLY_VERSION=dev20241002 +VISION_NIGHTLY_VERSION=dev20241113 # Nightly version for torchtune -TUNE_NIGHTLY_VERSION=dev20241010 +TUNE_NIGHTLY_VERSION=dev20241126 # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same ( @@ -81,7 +81,7 @@ TUNE_NIGHTLY_VERSION=dev20241010 # with cuda for faster execution on cuda GPUs. if [[ -x "$(command -v nvidia-smi)" ]]; then - TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu121" + TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu124" elif [[ -x "$(command -v rocminfo)" ]]; then TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/rocm6.2" @@ -93,7 +93,7 @@ fi REQUIREMENTS_TO_INSTALL=( torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}" torchvision=="0.20.0.${VISION_NIGHTLY_VERSION}" - torchtune=="0.4.0.${TUNE_NIGHTLY_VERSION}" + torchtune=="0.5.0.${TUNE_NIGHTLY_VERSION}" ) # Install the requirements. --extra-index-url tells pip to look for package @@ -106,7 +106,7 @@ REQUIREMENTS_TO_INSTALL=( ( set -x - $PIP_EXECUTABLE install torchao=="0.5.0" + $PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@2f97b0955953fa1a46594a27f0df2bc48d93e79d ) if [[ -x "$(command -v nvidia-smi)" ]]; then diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py index a39a2ed95..629eae8e5 100644 --- a/torchchat/cli/builder.py +++ b/torchchat/cli/builder.py @@ -373,6 +373,7 @@ def _load_model_gguf(builder_args: BuilderArgs) -> Model: kwargs = {} else: kwargs = builder_args.gguf_kwargs + kwargs.setdefault("device", builder_args.device) model = Model.from_gguf(builder_args.gguf_path, **kwargs) return model diff --git a/torchchat/utils/gguf_loader.py b/torchchat/utils/gguf_loader.py index 309ff807c..32a7a92b2 100644 --- a/torchchat/utils/gguf_loader.py +++ b/torchchat/utils/gguf_loader.py @@ -570,6 +570,7 @@ def load_model_and_state_dict( load_state_dict: bool = True, load_as_quantized: bool = True, inner_k_tiles=8, + device="cpu", ) -> torch.nn.Module: """ Parses the GGUF file and returns an nn.Module on meta device along with a state_dict @@ -609,9 +610,14 @@ def load_model_and_state_dict( q, s, z = Q4_0.unpack(t) scales_and_zeros = pack_scales_and_zeros(s, z) q_uint8 = (q[::, ::2] << 4 | q[::, 1::2]).to(torch.uint8) - weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( - q_uint8, inner_k_tiles - ) + if torch.device(device).type == "cpu": + weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu( + q_uint8, inner_k_tiles + ) + else: + weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( + q_uint8, inner_k_tiles + ) state_dict[f"{fqn}.weight"] = weight_int4pack state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros