Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,7 @@ jobs:

git clone https://github.com/ggerganov/llama.cpp.git
pushd llama.cpp
git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3
make
popd

Expand Down
Empty file.
12 changes: 6 additions & 6 deletions install/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ echo "Using pip executable: $PIP_EXECUTABLE"
# NOTE: If a newly-fetched version of the executorch repo changes the value of
# PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
# package versions.
PYTORCH_NIGHTLY_VERSION=dev20241002
PYTORCH_NIGHTLY_VERSION=dev20241113

# Nightly version for torchvision
VISION_NIGHTLY_VERSION=dev20241002
VISION_NIGHTLY_VERSION=dev20241113

# Nightly version for torchtune
TUNE_NIGHTLY_VERSION=dev20241010
TUNE_NIGHTLY_VERSION=dev20241126

# Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
(
Expand All @@ -81,7 +81,7 @@ TUNE_NIGHTLY_VERSION=dev20241010
# with cuda for faster execution on cuda GPUs.
if [[ -x "$(command -v nvidia-smi)" ]];
then
TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu121"
TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu124"
elif [[ -x "$(command -v rocminfo)" ]];
then
TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/rocm6.2"
Expand All @@ -93,7 +93,7 @@ fi
REQUIREMENTS_TO_INSTALL=(
torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}"
torchvision=="0.20.0.${VISION_NIGHTLY_VERSION}"
torchtune=="0.4.0.${TUNE_NIGHTLY_VERSION}"
torchtune=="0.5.0.${TUNE_NIGHTLY_VERSION}"
)

# Install the requirements. --extra-index-url tells pip to look for package
Expand All @@ -106,7 +106,7 @@ REQUIREMENTS_TO_INSTALL=(

(
set -x
$PIP_EXECUTABLE install torchao=="0.5.0"
$PIP_EXECUTABLE install git+https://github.com/pytorch/ao.git@2f97b0955953fa1a46594a27f0df2bc48d93e79d
)

if [[ -x "$(command -v nvidia-smi)" ]]; then
Expand Down
1 change: 1 addition & 0 deletions torchchat/cli/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ def _load_model_gguf(builder_args: BuilderArgs) -> Model:
kwargs = {}
else:
kwargs = builder_args.gguf_kwargs
kwargs.setdefault("device", builder_args.device)
model = Model.from_gguf(builder_args.gguf_path, **kwargs)
return model

Expand Down
12 changes: 9 additions & 3 deletions torchchat/utils/gguf_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ def load_model_and_state_dict(
load_state_dict: bool = True,
load_as_quantized: bool = True,
inner_k_tiles=8,
device="cpu",
) -> torch.nn.Module:
"""
Parses the GGUF file and returns an nn.Module on meta device along with a state_dict
Expand Down Expand Up @@ -609,9 +610,14 @@ def load_model_and_state_dict(
q, s, z = Q4_0.unpack(t)
scales_and_zeros = pack_scales_and_zeros(s, z)
q_uint8 = (q[::, ::2] << 4 | q[::, 1::2]).to(torch.uint8)
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
q_uint8, inner_k_tiles
)
if torch.device(device).type == "cpu":
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
q_uint8, inner_k_tiles
)
else:
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
q_uint8, inner_k_tiles
)
state_dict[f"{fqn}.weight"] = weight_int4pack
state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros

Expand Down
Loading