Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 2530e71

Browse files
committed
misc
1 parent da0a26d commit 2530e71

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

.github/workflows/pull.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ jobs:
731731
732732
git clone https://github.com/ggerganov/llama.cpp.git
733733
pushd llama.cpp
734+
git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3
734735
make
735736
popd
736737

torchchat/cli/builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,8 @@ def _load_model_gguf(builder_args: BuilderArgs) -> Model:
373373
kwargs = {}
374374
else:
375375
kwargs = builder_args.gguf_kwargs
376+
377+
kwargs.setdefault("device", builder_args.device)
376378
model = Model.from_gguf(builder_args.gguf_path, **kwargs)
377379
return model
378380

torchchat/utils/gguf_loader.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ def load_model_and_state_dict(
570570
load_state_dict: bool = True,
571571
load_as_quantized: bool = True,
572572
inner_k_tiles=8,
573+
device="cpu",
573574
) -> torch.nn.Module:
574575
"""
575576
Parses the GGUF file and returns an nn.Module on meta device along with a state_dict
@@ -609,9 +610,15 @@ def load_model_and_state_dict(
609610
q, s, z = Q4_0.unpack(t)
610611
scales_and_zeros = pack_scales_and_zeros(s, z)
611612
q_uint8 = (q[::, ::2] << 4 | q[::, 1::2]).to(torch.uint8)
612-
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
613-
q_uint8, inner_k_tiles
614-
)
613+
614+
if torch.device(device).type == "cpu":
615+
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack_for_cpu(
616+
q_uint8, inner_k_tiles
617+
)
618+
else:
619+
weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
620+
q_uint8, inner_k_tiles
621+
)
615622
state_dict[f"{fqn}.weight"] = weight_int4pack
616623
state_dict[f"{fqn}.scales_and_zeros"] = scales_and_zeros
617624

0 commit comments

Comments
 (0)