ggml-org · robbiemu · May 15, 2025 · May 15, 2025 · May 15, 2025 · May 16, 2025
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
@@ -1,3 +1,4 @@
+# pyright: reportInvalidTypeForm=false
 #
 # GGUF file reading/modification support. For API usage information,
 # please see the files scripts/ for some fairly simple examples.
@@ -15,6 +16,7 @@
 
 from .quants import quant_shape_to_byte_shape
 
+
 if __name__ == "__main__":
     from pathlib import Path
 
@@ -104,7 +106,7 @@ class ReaderTensor(NamedTuple):
     n_elements: int
     n_bytes: int
     data_offset: int
-    data: npt.NDArray[Any]
+    data: np.ndarray
     field: ReaderField
 
 
@@ -181,7 +183,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
         self.data_offset = offs
         self._build_tensors(offs, tensors_fields)
 
-    _DT = TypeVar('_DT', bound = npt.DTypeLike)
+    _DT = TypeVar('_DT', bound = np.dtype[Any])
 
     # Fetch a key/value metadata field by key.
     def get_field(self, key: str) -> Union[ReaderField, None]:
@@ -192,8 +194,8 @@ def get_tensor(self, idx: int) -> ReaderTensor:
         return self.tensors[idx]
 
     def _get(
-        self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
-    ) -> npt.NDArray[Any]:
+        self, offset: int, dtype: np.dtype[Any], count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
+    ) -> np.ndarray:
         count = int(count)
         itemsize = int(np.empty([], dtype = dtype).itemsize)
         end_offs = offset + itemsize * count
@@ -213,7 +215,7 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
 
     def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
         slen = self._get(offset, np.uint64)
-        return slen, self._get(offset + 8, np.uint8, slen[0])
+        return slen, self._get(offset + 8, np.uint8, slen[0].item())
 
     def _get_field_parts(
         self, orig_offs: int, raw_type: int,
@@ -230,7 +232,7 @@ def _get_field_parts(
         # Check if it's a simple scalar type.
         nptype = self.gguf_scalar_to_np.get(gtype)
         if nptype is not None:
-            val = self._get(offs, nptype)
+            val = self._get(offs, np.dtype(nptype))
             return int(val.nbytes), [val], [0], types
         # Handle arrays.
         if gtype == GGUFValueType.ARRAY:
@@ -242,7 +244,7 @@ def _get_field_parts(
             data_idxs: list[int] = []
             # FIXME: Handle multi-dimensional arrays properly instead of flattening
             for idx in range(alen[0]):
-                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
+                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0].item())
                 if idx == 0:
                     types += curr_types
                 idxs_offs = len(aparts)
@@ -265,7 +267,7 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
         offs += int(n_dims.nbytes)
 
         # Get Tensor Dimension Array
-        dims = self._get(offs, np.uint64, n_dims[0])
+        dims = self._get(offs, np.uint64, n_dims[0].item())
         offs += int(dims.nbytes)
 
         # Get Tensor Encoding Scheme Type
@@ -292,7 +294,7 @@ def _build_fields(self, offs: int, count: int) -> int:
             offs += int(raw_kv_type.nbytes)
             parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
             idxs_offs = len(parts)
-            field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
+            field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0].item())
             parts += field_parts
             self._push_field(ReaderField(
                 orig_offs,
@@ -328,28 +330,28 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
             block_size, type_size = GGML_QUANT_SIZES[ggml_type]
             n_bytes = n_elems * type_size // block_size
             data_offs = int(start_offs + offset_tensor[0])
-            item_type: npt.DTypeLike
+            item_type: np.dtype[Any]
             if ggml_type == GGMLQuantizationType.F16:
                 item_count = n_elems
-                item_type = np.float16
+                item_type = np.dtype(np.float16)
             elif ggml_type == GGMLQuantizationType.F32:
                 item_count = n_elems
-                item_type = np.float32
+                item_type = np.dtype(np.float32)
             elif ggml_type == GGMLQuantizationType.F64:
                 item_count = n_elems
-                item_type = np.float64
+                item_type = np.dtype(np.float64)
             elif ggml_type == GGMLQuantizationType.I8:
                 item_count = n_elems
-                item_type = np.int8
+                item_type = np.dtype(np.int8)
             elif ggml_type == GGMLQuantizationType.I16:
                 item_count = n_elems
-                item_type = np.int16
+                item_type = np.dtype(np.int16)
             elif ggml_type == GGMLQuantizationType.I32:
                 item_count = n_elems
-                item_type = np.int32
+                item_type = np.dtype(np.int32)
             elif ggml_type == GGMLQuantizationType.I64:
                 item_count = n_elems
-                item_type = np.int64
+                item_type = np.dtype(np.int64)
             else:
                 item_count = n_bytes
                 item_type = np.uint8

diff --git a/gguf-py/gguf/lazy.py b/gguf-py/gguf/lazy.py
@@ -5,7 +5,6 @@
 from typing import Any, Callable
 
 import numpy as np
-from numpy.typing import DTypeLike
 
 
 logger = logging.getLogger(__name__)
@@ -107,7 +106,7 @@ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
             return o
 
     @classmethod
-    def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
+    def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | np.dtype[Any] | tuple[np.dtype[Any], Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
         def wrapped_fn(*args, **kwargs):
             if kwargs is None:
                 kwargs = {}
@@ -204,7 +203,7 @@ class LazyNumpyTensor(LazyBase):
     shape: tuple[int, ...]  # Makes the type checker happy in quants.py
 
     @classmethod
-    def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
+    def meta_with_dtype_and_shape(cls, dtype: np.dtype[Any], shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
         # The initial idea was to use np.nan as the fill value,
         # but non-float types like np.int16 can't use that.
         # So zero it is.

diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
@@ -1,10 +1,10 @@
+# pyright: reportInvalidTypeForm=false
+
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Sequence
 from math import log2, ceil
 
-from numpy.typing import DTypeLike
-
 from .constants import GGML_QUANT_SIZES, GGMLQuantizationType, QK_K
 from .lazy import LazyNumpyTensor
 
@@ -26,7 +26,7 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
 
 
 # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
-def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
+def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: np.dtype[Any], oshape: tuple[int, ...]) -> np.ndarray:
     rows = arr.reshape((-1, arr.shape[-1]))
     osize = 1
     for dim in oshape:

@@ -19,7 +19,7 @@ classifiers = [
 
 [tool.poetry.dependencies]
 python = ">=3.8"
-numpy = ">=1.17"
+numpy = ">=2.1"
 tqdm = ">=4.27"
 pyyaml = ">=5.1"
 sentencepiece = ">=0.1.98,<=0.2.0"

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,8 +15,8 @@ classifiers = [
 ]
 
 [tool.poetry.dependencies]
-python = ">=3.9"
-numpy = "^1.25.0"
+python = ">=3.10,<3.14"
+numpy = "^2.1"
 sentencepiece = ">=0.1.98,<=0.2.0"
 transformers = ">=4.35.2,<5.0.0"
 protobuf = ">=4.21.0,<5.0.0"

@@ -1,3 +1,3 @@
 -r ./requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch~=2.2.1
+torch>=2.5.1
@@ -1,3 +1,3 @@
 -r ./requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch~=2.2.1
+torch>=2.5.1
@@ -1,4 +1,4 @@
-numpy~=1.26.4
+numpy>=2.1
 sentencepiece~=0.2.0
 transformers>=4.45.1,<5.0.0
 gguf>=0.1.0

@@ -1,3 +1,3 @@
-numpy~=1.26.4
+numpy>=2.1
 PySide6~=6.9.0
 gguf>=0.16.0
@@ -2,11 +2,11 @@ aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub~=0.23.2
 matplotlib~=3.10.0
-numpy~=1.26.4
+numpy>=2.1
 openai~=1.55.3
 pandas~=2.2.3
 prometheus-client~=0.20.0
-requests~=2.32.3
+requests>=2.28.1
 wget~=3.2
 typer~=0.15.1
 seaborn~=0.13.2
diff --git a/tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py b/tools/mtmd/legacy-models/convert_image_encoder_to_gguf.py
@@ -6,6 +6,10 @@
 import torch
 import numpy as np
 from gguf import *
+from typing import cast
+from torch.nn import ModuleList
+from transformers.models.clip.modeling_clip import CLIPVisionTransformer
+from transformers import PreTrainedModel
 from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel, SiglipVisionModel
 
 TEXT = "clip.text"
@@ -162,13 +166,13 @@ def bytes_to_unicode():
     ftype = 0
 
 if args.clip_model_is_siglip:
-    model = SiglipVisionModel.from_pretrained(dir_model)
+    model: PreTrainedModel = SiglipVisionModel.from_pretrained(dir_model)
     processor = None
 elif args.clip_model_is_vision or args.clip_model_is_openclip:
-    model = CLIPVisionModel.from_pretrained(dir_model)
+    model: PreTrainedModel = CLIPVisionModel.from_pretrained(dir_model)
     processor = None
 else:
-    model = CLIPModel.from_pretrained(dir_model)
+    model: PreTrainedModel = CLIPModel.from_pretrained(dir_model)
     processor = CLIPProcessor.from_pretrained(dir_model)
 
 fname_middle = None
@@ -350,9 +354,14 @@ def get_non_negative_vision_feature_layers(v_hparams):
     # By default, we drop the last layer for llava projector
     # models unless we have explicitly set vision feature layers
     if feature_layers is None:
-        model.vision_model.encoder.layers.pop(-1)
+        vision_model = cast(CLIPVisionTransformer, model.vision_model)
+        encoder_layers = vision_model.encoder.layers
+        encoder_layers.pop(-1)
     else:
-        model.vision_model.encoder.layers = model.vision_model.encoder.layers[:max(feature_layers)]
+        vision_model = cast(CLIPVisionTransformer, model.vision_model)
+        encoder_layers = vision_model.encoder.layers
+        encoder_layers = cast(ModuleList, encoder_layers)
+        encoder_layers.__init__(encoder_layers[:max(feature_layers)])
 
     projector = torch.load(args.llava_projector)
     for name, data in projector.items():
@@ -375,24 +384,24 @@ def get_non_negative_vision_feature_layers(v_hparams):
         continue
 
     name = get_tensor_name(name)
-    data = data.squeeze().numpy()
+    data = np.ascontiguousarray(data.detach().cpu().squeeze().numpy())
 
     n_dims = len(data.shape)
 
     # ftype == 0 -> float32, ftype == 1 -> float16
     ftype_cur = 0
     if n_dims == 4:
         print(f"tensor {name} is always saved in f16")
-        data = data.astype(np.float16)
+        data = np.asarray(data, dtype=np.float16)
         ftype_cur = 1
     elif ftype == 1:
         if name[-7:] == ".weight" and n_dims == 2:
             print("  Converting to float16")
-            data = data.astype(np.float16)
+            data = np.asarray(data, dtype=np.float16)
             ftype_cur = 1
         else:
             print("  Converting to float32")
-            data = data.astype(np.float32)
+            data = np.asarray(data, dtype=np.float32)
             ftype_cur = 0
     else:
         if data.dtype != np.float32:

@@ -1,5 +1,5 @@
 -r ../../requirements/requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
 pillow~=10.2.0
-torch~=2.2.1
-torchvision~=0.17.1
+torch>=2.5
+torchvision>=0.20.1
@@ -1,8 +1,8 @@
 aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub~=0.23.2
-numpy~=1.26.4
+numpy>=2.1
 openai~=1.55.3
 prometheus-client~=0.20.0
-requests~=2.32.3
+requests>=2.28.1
 wget~=3.2