ggml-org · robbiemu · May 15, 2025 · May 15, 2025 · May 15, 2025 · May 16, 2025
diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py
@@ -1308,7 +1308,7 @@ def do_dump_model(model_plus: ModelPlus) -> None:
 
 def main(args_in: list[str] | None = None) -> None:
     output_choices = ["f32", "f16"]
-    if np.uint32(1) == np.uint32(1).newbyteorder("<"):
+    if np.uint32(1) == np.uint32(1).view(np.dtype(np.uint32).newbyteorder("<")):
         # We currently only support Q8_0 output on little endian systems.
         output_choices.append("q8_0")
     parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")

diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
@@ -1,4 +1,3 @@
-#
 # GGUF file reading/modification support. For API usage information,
 # please see the files scripts/ for some fairly simple examples.
 #
@@ -15,6 +14,7 @@
 
 from .quants import quant_shape_to_byte_shape
 
+
 if __name__ == "__main__":
     from pathlib import Path
 
@@ -134,12 +134,12 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
         offs = 0
 
         # Check for GGUF magic
-        if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
+        if self._get(offs, np.dtype(np.uint32), override_order = '<')[0] != GGUF_MAGIC:
             raise ValueError('GGUF magic invalid')
         offs += 4
 
         # Check GGUF version
-        temp_version = self._get(offs, np.uint32)
+        temp_version = self._get(offs, np.dtype(np.uint32))
         if temp_version[0] & 65535 == 0:
             # If we get 0 here that means it's (probably) a GGUF file created for
             # the opposite byte order of the machine this script is running on.
@@ -162,7 +162,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
         offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
 
         # Check tensor count and kv count
-        temp_counts = self._get(offs, np.uint64, 2)
+        temp_counts = self._get(offs, np.dtype(np.uint64), 2)
         offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
         offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
         tensor_count, kv_count = temp_counts
@@ -193,7 +193,7 @@ def get_tensor(self, idx: int) -> ReaderTensor:
 
     def _get(
         self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
-    ) -> npt.NDArray[Any]:
+    ) -> np.ndarray:
         count = int(count)
         itemsize = int(np.empty([], dtype = dtype).itemsize)
         end_offs = offset + itemsize * count
@@ -212,8 +212,8 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
         return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
 
     def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
-        slen = self._get(offset, np.uint64)
-        return slen, self._get(offset + 8, np.uint8, slen[0])
+        slen = self._get(offset, np.dtype(np.uint64))
+        return slen, self._get(offset + 8, np.dtype(np.uint8), slen[0].item())
 
     def _get_field_parts(
         self, orig_offs: int, raw_type: int,
@@ -230,19 +230,19 @@ def _get_field_parts(
         # Check if it's a simple scalar type.
         nptype = self.gguf_scalar_to_np.get(gtype)
         if nptype is not None:
-            val = self._get(offs, nptype)
+            val = self._get(offs, np.dtype(nptype))
             return int(val.nbytes), [val], [0], types
         # Handle arrays.
         if gtype == GGUFValueType.ARRAY:
-            raw_itype = self._get(offs, np.uint32)
+            raw_itype = self._get(offs, np.dtype(np.uint32))
             offs += int(raw_itype.nbytes)
-            alen = self._get(offs, np.uint64)
+            alen = self._get(offs, np.dtype(np.uint64))
             offs += int(alen.nbytes)
             aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
             data_idxs: list[int] = []
             # FIXME: Handle multi-dimensional arrays properly instead of flattening
             for idx in range(alen[0]):
-                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
+                curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0].item())
                 if idx == 0:
                     types += curr_types
                 idxs_offs = len(aparts)
@@ -261,19 +261,19 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
         offs += int(name_len.nbytes + name_data.nbytes)
 
         # Get Tensor Dimensions Count
-        n_dims = self._get(offs, np.uint32)
+        n_dims = self._get(offs, np.dtype(np.uint32))
         offs += int(n_dims.nbytes)
 
         # Get Tensor Dimension Array
-        dims = self._get(offs, np.uint64, n_dims[0])
+        dims = self._get(offs, np.dtype(np.uint64), n_dims[0].item())
         offs += int(dims.nbytes)
 
         # Get Tensor Encoding Scheme Type
-        raw_dtype = self._get(offs, np.uint32)
+        raw_dtype = self._get(offs, np.dtype(np.uint32))
         offs += int(raw_dtype.nbytes)
 
         # Get Tensor Offset
-        offset_tensor = self._get(offs, np.uint64)
+        offset_tensor = self._get(offs, np.dtype(np.uint64))
         offs += int(offset_tensor.nbytes)
 
         return ReaderField(
@@ -288,11 +288,11 @@ def _build_fields(self, offs: int, count: int) -> int:
             orig_offs = offs
             kv_klen, kv_kdata = self._get_str(offs)
             offs += int(kv_klen.nbytes + kv_kdata.nbytes)
-            raw_kv_type = self._get(offs, np.uint32)
+            raw_kv_type = self._get(offs, np.dtype(np.uint32))
             offs += int(raw_kv_type.nbytes)
             parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
             idxs_offs = len(parts)
-            field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
+            field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0].item())
             parts += field_parts
             self._push_field(ReaderField(
                 orig_offs,
@@ -331,28 +331,28 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
             item_type: npt.DTypeLike
             if ggml_type == GGMLQuantizationType.F16:
                 item_count = n_elems
-                item_type = np.float16
+                item_type = np.dtype(np.float16)
             elif ggml_type == GGMLQuantizationType.F32:
                 item_count = n_elems
-                item_type = np.float32
+                item_type = np.dtype(np.float32)
             elif ggml_type == GGMLQuantizationType.F64:
                 item_count = n_elems
-                item_type = np.float64
+                item_type = np.dtype(np.float64)
             elif ggml_type == GGMLQuantizationType.I8:
                 item_count = n_elems
-                item_type = np.int8
+                item_type = np.dtype(np.int8)
             elif ggml_type == GGMLQuantizationType.I16:
                 item_count = n_elems
-                item_type = np.int16
+                item_type = np.dtype(np.int16)
             elif ggml_type == GGMLQuantizationType.I32:
                 item_count = n_elems
-                item_type = np.int32
+                item_type = np.dtype(np.int32)
             elif ggml_type == GGMLQuantizationType.I64:
                 item_count = n_elems
-                item_type = np.int64
+                item_type = np.dtype(np.int64)
             else:
                 item_count = n_bytes
-                item_type = np.uint8
+                item_type = np.dtype(np.uint8)
                 np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
             tensors.append(ReaderTensor(
                 name = tensor_name,

diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
@@ -93,11 +93,11 @@ def __init_subclass__(cls, qtype: GGMLQuantizationType) -> None:
         cls.block_size, cls.type_size = GGML_QUANT_SIZES[qtype]
         cls.__quantize_lazy = LazyNumpyTensor._wrap_fn(
             cls.__quantize_array,
-            meta_noop=(np.uint8, cls.__shape_to_bytes)
+            meta_noop=(np.dtype(np.uint8), cls.__shape_to_bytes)
         )
         cls.__dequantize_lazy = LazyNumpyTensor._wrap_fn(
             cls.__dequantize_array,
-            meta_noop=(np.float32, cls.__shape_from_bytes)
+            meta_noop=(np.dtype(np.float32), cls.__shape_from_bytes)
         )
         assert qtype not in _type_traits
         _type_traits[qtype] = cls
@@ -165,12 +165,12 @@ def __shape_from_bytes(cls, shape: Sequence[int]):
 
     @classmethod
     def __quantize_array(cls, array: np.ndarray) -> np.ndarray:
-        return _apply_over_grouped_rows(cls.quantize_rows, arr=array, otype=np.uint8, oshape=cls.__shape_to_bytes(array.shape))
+        return _apply_over_grouped_rows(cls.quantize_rows, arr=array, otype=np.dtype(np.uint8), oshape=cls.__shape_to_bytes(array.shape))
 
     @classmethod
     def __dequantize_array(cls, array: np.ndarray) -> np.ndarray:
         cls.init_grid()
-        return _apply_over_grouped_rows(cls.dequantize_rows, arr=array, otype=np.float32, oshape=cls.__shape_from_bytes(array.shape))
+        return _apply_over_grouped_rows(cls.dequantize_rows, arr=array, otype=np.dtype(np.float32), oshape=cls.__shape_from_bytes(array.shape))
 
     @classmethod
     def __quantize_lazy(cls, lazy_tensor: LazyNumpyTensor, /) -> Any:

@@ -19,7 +19,7 @@ classifiers = [
 
 [tool.poetry.dependencies]
 python = ">=3.8"
-numpy = ">=1.17"
+numpy = ">=2.1"
 tqdm = ">=4.27"
 pyyaml = ">=5.1"
 sentencepiece = ">=0.1.98,<=0.2.0"

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,8 +15,8 @@ classifiers = [
 ]
 
 [tool.poetry.dependencies]
-python = ">=3.9"
-numpy = "^1.25.0"
+python = ">=3.10,<3.14"
+numpy = "^2.1"
 sentencepiece = ">=0.1.98,<=0.2.0"
 transformers = ">=4.35.2,<5.0.0"
 protobuf = ">=4.21.0,<5.0.0"

diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -1,7 +1,8 @@
 {
   "extraPaths": ["gguf-py"],
-  "pythonVersion": "3.9",
+  "pythonVersion": "3.10",
   "pythonPlatform": "All",
+  "reportInvalidTypeForm": false,
   "reportUnusedImport": "warning",
   "reportDuplicateImport": "error",
   "reportDeprecated": "warning",
@@ -11,12 +12,15 @@
     {
       // TODO: make this version override work correctly
       "root": "gguf-py",
-      "pythonVersion": "3.8",
+      "pythonVersion": "3.10",
     },
     {
       // uses match expressions in steps.py
       "root": "tools/server/tests",
       "pythonVersion": "3.10",
     },
   ],
+  "exclude": [
+    "tools/mtmd/legacy-models",
+  ]
  }
@@ -1,3 +1,3 @@
 -r ./requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch~=2.2.1
+torch>=2.5.1
@@ -1,3 +1,3 @@
 -r ./requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
-torch~=2.2.1
+torch>=2.5.1
@@ -1,4 +1,4 @@
-numpy~=1.26.4
+numpy>=2.1
 sentencepiece~=0.2.0
 transformers>=4.45.1,<5.0.0
 gguf>=0.1.0

@@ -1,3 +1,3 @@
-numpy~=1.26.4
+numpy>=2.1
 PySide6~=6.9.0
 gguf>=0.16.0
@@ -2,11 +2,11 @@ aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub~=0.23.2
 matplotlib~=3.10.0
-numpy~=1.26.4
+numpy>=2.1
 openai~=1.55.3
 pandas~=2.2.3
 prometheus-client~=0.20.0
-requests~=2.32.3
+requests>=2.28.1
 wget~=3.2
 typer~=0.15.1
 seaborn~=0.13.2
@@ -1,5 +1,5 @@
 -r ../../requirements/requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
 pillow~=10.2.0
-torch~=2.2.1
-torchvision~=0.17.1
+torch>=2.5
+torchvision>=0.20.1
@@ -1,8 +1,8 @@
 aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub~=0.23.2
-numpy~=1.26.4
+numpy>=2.1
 openai~=1.55.3
 prometheus-client~=0.20.0
-requests~=2.32.3
+requests>=2.28.1
 wget~=3.2