Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 19 additions & 17 deletions gguf-py/gguf/gguf_reader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pyright: reportInvalidTypeForm=false
#
# GGUF file reading/modification support. For API usage information,
# please see the files scripts/ for some fairly simple examples.
Expand All @@ -15,6 +16,7 @@

from .quants import quant_shape_to_byte_shape


if __name__ == "__main__":
from pathlib import Path

Expand Down Expand Up @@ -104,7 +106,7 @@ class ReaderTensor(NamedTuple):
n_elements: int
n_bytes: int
data_offset: int
data: npt.NDArray[Any]
data: np.ndarray
field: ReaderField


Expand Down Expand Up @@ -181,7 +183,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
self.data_offset = offs
self._build_tensors(offs, tensors_fields)

_DT = TypeVar('_DT', bound = npt.DTypeLike)
_DT = TypeVar('_DT', bound = np.dtype[Any])

# Fetch a key/value metadata field by key.
def get_field(self, key: str) -> Union[ReaderField, None]:
Expand All @@ -192,8 +194,8 @@ def get_tensor(self, idx: int) -> ReaderTensor:
return self.tensors[idx]

def _get(
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
) -> npt.NDArray[Any]:
self, offset: int, dtype: np.dtype[Any], count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
) -> np.ndarray:
count = int(count)
itemsize = int(np.empty([], dtype = dtype).itemsize)
end_offs = offset + itemsize * count
Expand All @@ -213,7 +215,7 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:

def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
slen = self._get(offset, np.uint64)
return slen, self._get(offset + 8, np.uint8, slen[0])
return slen, self._get(offset + 8, np.uint8, slen[0].item())

def _get_field_parts(
self, orig_offs: int, raw_type: int,
Expand All @@ -230,7 +232,7 @@ def _get_field_parts(
# Check if it's a simple scalar type.
nptype = self.gguf_scalar_to_np.get(gtype)
if nptype is not None:
val = self._get(offs, nptype)
val = self._get(offs, np.dtype(nptype))
return int(val.nbytes), [val], [0], types
# Handle arrays.
if gtype == GGUFValueType.ARRAY:
Expand All @@ -242,7 +244,7 @@ def _get_field_parts(
data_idxs: list[int] = []
# FIXME: Handle multi-dimensional arrays properly instead of flattening
for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0].item())
if idx == 0:
types += curr_types
idxs_offs = len(aparts)
Expand All @@ -265,7 +267,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
offs += int(n_dims.nbytes)

# Get Tensor Dimension Array
dims = self._get(offs, np.uint64, n_dims[0])
dims = self._get(offs, np.uint64, n_dims[0].item())
offs += int(dims.nbytes)

# Get Tensor Encoding Scheme Type
Expand All @@ -292,7 +294,7 @@ def _build_fields(self, offs: int, count: int) -> int:
offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts)
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0].item())
parts += field_parts
self._push_field(ReaderField(
orig_offs,
Expand Down Expand Up @@ -328,28 +330,28 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
n_bytes = n_elems * type_size // block_size
data_offs = int(start_offs + offset_tensor[0])
item_type: npt.DTypeLike
item_type: np.dtype[Any]
if ggml_type == GGMLQuantizationType.F16:
item_count = n_elems
item_type = np.float16
item_type = np.dtype(np.float16)
elif ggml_type == GGMLQuantizationType.F32:
item_count = n_elems
item_type = np.float32
item_type = np.dtype(np.float32)
elif ggml_type == GGMLQuantizationType.F64:
item_count = n_elems
item_type = np.float64
item_type = np.dtype(np.float64)
elif ggml_type == GGMLQuantizationType.I8:
item_count = n_elems
item_type = np.int8
item_type = np.dtype(np.int8)
elif ggml_type == GGMLQuantizationType.I16:
item_count = n_elems
item_type = np.int16
item_type = np.dtype(np.int16)
elif ggml_type == GGMLQuantizationType.I32:
item_count = n_elems
item_type = np.int32
item_type = np.dtype(np.int32)
elif ggml_type == GGMLQuantizationType.I64:
item_count = n_elems
item_type = np.int64
item_type = np.dtype(np.int64)
else:
item_count = n_bytes
item_type = np.uint8
Expand Down
5 changes: 2 additions & 3 deletions gguf-py/gguf/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Any, Callable

import numpy as np
from numpy.typing import DTypeLike


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -107,7 +106,7 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
return o

@classmethod
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | np.dtype[Any] | tuple[np.dtype[Any], Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
def wrapped_fn(*args, **kwargs):
if kwargs is None:
kwargs = {}
Expand Down Expand Up @@ -204,7 +203,7 @@ class LazyNumpyTensor(LazyBase):
shape: tuple[int, ...] # Makes the type checker happy in quants.py

@classmethod
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
def meta_with_dtype_and_shape(cls, dtype: np.dtype[Any], shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
# The initial idea was to use np.nan as the fill value,
# but non-float types like np.int16 can't use that.
# So zero it is.
Expand Down
6 changes: 3 additions & 3 deletions gguf-py/gguf/quants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# pyright: reportInvalidTypeForm=false

from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Callable, Sequence
from math import log2, ceil

from numpy.typing import DTypeLike

from .constants import GGML_QUANT_SIZES, GGMLQuantizationType, QK_K
from .lazy import LazyNumpyTensor

Expand All @@ -26,7 +26,7 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati


# This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: np.dtype[Any], oshape: tuple[int, ...]) -> np.ndarray:
rows = arr.reshape((-1, arr.shape[-1]))
osize = 1
for dim in oshape:
Expand Down
2 changes: 1 addition & 1 deletion gguf-py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [

[tool.poetry.dependencies]
python = ">=3.8"
numpy = ">=1.17"
numpy = ">=2.1"
tqdm = ">=4.27"
pyyaml = ">=5.1"
sentencepiece = ">=0.1.98,<=0.2.0"
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ classifiers = [
]

[tool.poetry.dependencies]
python = ">=3.9"
numpy = "^1.25.0"
python = ">=3.10,<3.14"
numpy = "^2.1"
sentencepiece = ">=0.1.98,<=0.2.0"
transformers = ">=4.35.2,<5.0.0"
protobuf = ">=4.21.0,<5.0.0"
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-convert_hf_to_gguf.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-r ./requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
torch~=2.2.1
torch>=2.5.1
2 changes: 1 addition & 1 deletion requirements/requirements-convert_hf_to_gguf_update.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-r ./requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
torch~=2.2.1
torch>=2.5.1
2 changes: 1 addition & 1 deletion requirements/requirements-convert_legacy_llama.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy~=1.26.4
numpy>=2.1
sentencepiece~=0.2.0
transformers>=4.45.1,<5.0.0
gguf>=0.1.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-gguf_editor_gui.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpy~=1.26.4
numpy>=2.1
PySide6~=6.9.0
gguf>=0.16.0
4 changes: 2 additions & 2 deletions requirements/requirements-tool_bench.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ aiohttp~=3.9.3
pytest~=8.3.3
huggingface_hub~=0.23.2
matplotlib~=3.10.0
numpy~=1.26.4
numpy>=2.1
openai~=1.55.3
pandas~=2.2.3
prometheus-client~=0.20.0
requests~=2.32.3
requests>=2.28.1
wget~=3.2
typer~=0.15.1
seaborn~=0.13.2
27 changes: 18 additions & 9 deletions tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import torch
import numpy as np
from gguf import *
from typing import cast
from torch.nn import ModuleList
from transformers.models.clip.modeling_clip import CLIPVisionTransformer
from transformers import PreTrainedModel
from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel, SiglipVisionModel

TEXT = "clip.text"
Expand Down Expand Up @@ -162,13 +166,13 @@ def bytes_to_unicode():
ftype = 0

if args.clip_model_is_siglip:
model = SiglipVisionModel.from_pretrained(dir_model)
model: PreTrainedModel = SiglipVisionModel.from_pretrained(dir_model)
processor = None
elif args.clip_model_is_vision or args.clip_model_is_openclip:
model = CLIPVisionModel.from_pretrained(dir_model)
model: PreTrainedModel = CLIPVisionModel.from_pretrained(dir_model)
processor = None
else:
model = CLIPModel.from_pretrained(dir_model)
model: PreTrainedModel = CLIPModel.from_pretrained(dir_model)
processor = CLIPProcessor.from_pretrained(dir_model)

fname_middle = None
Expand Down Expand Up @@ -350,9 +354,14 @@ def get_non_negative_vision_feature_layers(v_hparams):
# By default, we drop the last layer for llava projector
# models unless we have explicitly set vision feature layers
if feature_layers is None:
model.vision_model.encoder.layers.pop(-1)
vision_model = cast(CLIPVisionTransformer, model.vision_model)
encoder_layers = vision_model.encoder.layers
encoder_layers.pop(-1)
else:
model.vision_model.encoder.layers = model.vision_model.encoder.layers[:max(feature_layers)]
vision_model = cast(CLIPVisionTransformer, model.vision_model)
encoder_layers = vision_model.encoder.layers
encoder_layers = cast(ModuleList, encoder_layers)
encoder_layers.__init__(encoder_layers[:max(feature_layers)])

projector = torch.load(args.llava_projector)
for name, data in projector.items():
Expand All @@ -375,24 +384,24 @@ def get_non_negative_vision_feature_layers(v_hparams):
continue

name = get_tensor_name(name)
data = data.squeeze().numpy()
data = np.ascontiguousarray(data.detach().cpu().squeeze().numpy())

n_dims = len(data.shape)

# ftype == 0 -> float32, ftype == 1 -> float16
ftype_cur = 0
if n_dims == 4:
print(f"tensor {name} is always saved in f16")
data = data.astype(np.float16)
data = np.asarray(data, dtype=np.float16)
ftype_cur = 1
elif ftype == 1:
if name[-7:] == ".weight" and n_dims == 2:
print(" Converting to float16")
data = data.astype(np.float16)
data = np.asarray(data, dtype=np.float16)
ftype_cur = 1
else:
print(" Converting to float32")
data = data.astype(np.float32)
data = np.asarray(data, dtype=np.float32)
ftype_cur = 0
else:
if data.dtype != np.float32:
Expand Down
4 changes: 2 additions & 2 deletions tools/mtmd/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-r ../../requirements/requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
pillow~=10.2.0
torch~=2.2.1
torchvision~=0.17.1
torch>=2.5
torchvision>=0.20.1
4 changes: 2 additions & 2 deletions tools/server/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
aiohttp~=3.9.3
pytest~=8.3.3
huggingface_hub~=0.23.2
numpy~=1.26.4
numpy>=2.1
openai~=1.55.3
prometheus-client~=0.20.0
requests~=2.32.3
requests>=2.28.1
wget~=3.2
Loading