Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/convert_legacy_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -1308,7 +1308,7 @@ def do_dump_model(model_plus: ModelPlus) -> None:

def main(args_in: list[str] | None = None) -> None:
output_choices = ["f32", "f16"]
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
if np.uint32(1) == np.uint32(1).view(np.dtype(np.uint32).newbyteorder("<")):
# We currently only support Q8_0 output on little endian systems.
output_choices.append("q8_0")
parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
Expand Down
50 changes: 25 additions & 25 deletions gguf-py/gguf/gguf_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#
# GGUF file reading/modification support. For API usage information,
# please see the files scripts/ for some fairly simple examples.
#
Expand All @@ -15,6 +14,7 @@

from .quants import quant_shape_to_byte_shape


if __name__ == "__main__":
from pathlib import Path

Expand Down Expand Up @@ -134,12 +134,12 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
offs = 0

# Check for GGUF magic
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
if self._get(offs, np.dtype(np.uint32), override_order = '<')[0] != GGUF_MAGIC:
raise ValueError('GGUF magic invalid')
offs += 4

# Check GGUF version
temp_version = self._get(offs, np.uint32)
temp_version = self._get(offs, np.dtype(np.uint32))
if temp_version[0] & 65535 == 0:
# If we get 0 here that means it's (probably) a GGUF file created for
# the opposite byte order of the machine this script is running on.
Expand All @@ -162,7 +162,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] =
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))

# Check tensor count and kv count
temp_counts = self._get(offs, np.uint64, 2)
temp_counts = self._get(offs, np.dtype(np.uint64), 2)
offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
tensor_count, kv_count = temp_counts
Expand Down Expand Up @@ -193,7 +193,7 @@ def get_tensor(self, idx: int) -> ReaderTensor:

def _get(
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
) -> npt.NDArray[Any]:
) -> np.ndarray:
count = int(count)
itemsize = int(np.empty([], dtype = dtype).itemsize)
end_offs = offset + itemsize * count
Expand All @@ -212,8 +212,8 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)

def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
slen = self._get(offset, np.uint64)
return slen, self._get(offset + 8, np.uint8, slen[0])
slen = self._get(offset, np.dtype(np.uint64))
return slen, self._get(offset + 8, np.dtype(np.uint8), slen[0].item())

def _get_field_parts(
self, orig_offs: int, raw_type: int,
Expand All @@ -230,19 +230,19 @@ def _get_field_parts(
# Check if it's a simple scalar type.
nptype = self.gguf_scalar_to_np.get(gtype)
if nptype is not None:
val = self._get(offs, nptype)
val = self._get(offs, np.dtype(nptype))
return int(val.nbytes), [val], [0], types
# Handle arrays.
if gtype == GGUFValueType.ARRAY:
raw_itype = self._get(offs, np.uint32)
raw_itype = self._get(offs, np.dtype(np.uint32))
offs += int(raw_itype.nbytes)
alen = self._get(offs, np.uint64)
alen = self._get(offs, np.dtype(np.uint64))
offs += int(alen.nbytes)
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = []
# FIXME: Handle multi-dimensional arrays properly instead of flattening
for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0].item())
if idx == 0:
types += curr_types
idxs_offs = len(aparts)
Expand All @@ -261,19 +261,19 @@ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
offs += int(name_len.nbytes + name_data.nbytes)

# Get Tensor Dimensions Count
n_dims = self._get(offs, np.uint32)
n_dims = self._get(offs, np.dtype(np.uint32))
offs += int(n_dims.nbytes)

# Get Tensor Dimension Array
dims = self._get(offs, np.uint64, n_dims[0])
dims = self._get(offs, np.dtype(np.uint64), n_dims[0].item())
offs += int(dims.nbytes)

# Get Tensor Encoding Scheme Type
raw_dtype = self._get(offs, np.uint32)
raw_dtype = self._get(offs, np.dtype(np.uint32))
offs += int(raw_dtype.nbytes)

# Get Tensor Offset
offset_tensor = self._get(offs, np.uint64)
offset_tensor = self._get(offs, np.dtype(np.uint64))
offs += int(offset_tensor.nbytes)

return ReaderField(
Expand All @@ -288,11 +288,11 @@ def _build_fields(self, offs: int, count: int) -> int:
orig_offs = offs
kv_klen, kv_kdata = self._get_str(offs)
offs += int(kv_klen.nbytes + kv_kdata.nbytes)
raw_kv_type = self._get(offs, np.uint32)
raw_kv_type = self._get(offs, np.dtype(np.uint32))
offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts)
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0].item())
parts += field_parts
self._push_field(ReaderField(
orig_offs,
Expand Down Expand Up @@ -331,28 +331,28 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
item_type: npt.DTypeLike
if ggml_type == GGMLQuantizationType.F16:
item_count = n_elems
item_type = np.float16
item_type = np.dtype(np.float16)
elif ggml_type == GGMLQuantizationType.F32:
item_count = n_elems
item_type = np.float32
item_type = np.dtype(np.float32)
elif ggml_type == GGMLQuantizationType.F64:
item_count = n_elems
item_type = np.float64
item_type = np.dtype(np.float64)
elif ggml_type == GGMLQuantizationType.I8:
item_count = n_elems
item_type = np.int8
item_type = np.dtype(np.int8)
elif ggml_type == GGMLQuantizationType.I16:
item_count = n_elems
item_type = np.int16
item_type = np.dtype(np.int16)
elif ggml_type == GGMLQuantizationType.I32:
item_count = n_elems
item_type = np.int32
item_type = np.dtype(np.int32)
elif ggml_type == GGMLQuantizationType.I64:
item_count = n_elems
item_type = np.int64
item_type = np.dtype(np.int64)
else:
item_count = n_bytes
item_type = np.uint8
item_type = np.dtype(np.uint8)
np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
tensors.append(ReaderTensor(
name = tensor_name,
Expand Down
8 changes: 4 additions & 4 deletions gguf-py/gguf/quants.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ def __init_subclass__(cls, qtype: GGMLQuantizationType) -> None:
cls.block_size, cls.type_size = GGML_QUANT_SIZES[qtype]
cls.__quantize_lazy = LazyNumpyTensor._wrap_fn(
cls.__quantize_array,
meta_noop=(np.uint8, cls.__shape_to_bytes)
meta_noop=(np.dtype(np.uint8), cls.__shape_to_bytes)
)
cls.__dequantize_lazy = LazyNumpyTensor._wrap_fn(
cls.__dequantize_array,
meta_noop=(np.float32, cls.__shape_from_bytes)
meta_noop=(np.dtype(np.float32), cls.__shape_from_bytes)
)
assert qtype not in _type_traits
_type_traits[qtype] = cls
Expand Down Expand Up @@ -165,12 +165,12 @@ def __shape_from_bytes(cls, shape: Sequence[int]):

@classmethod
def __quantize_array(cls, array: np.ndarray) -> np.ndarray:
return _apply_over_grouped_rows(cls.quantize_rows, arr=array, otype=np.uint8, oshape=cls.__shape_to_bytes(array.shape))
return _apply_over_grouped_rows(cls.quantize_rows, arr=array, otype=np.dtype(np.uint8), oshape=cls.__shape_to_bytes(array.shape))

@classmethod
def __dequantize_array(cls, array: np.ndarray) -> np.ndarray:
cls.init_grid()
return _apply_over_grouped_rows(cls.dequantize_rows, arr=array, otype=np.float32, oshape=cls.__shape_from_bytes(array.shape))
return _apply_over_grouped_rows(cls.dequantize_rows, arr=array, otype=np.dtype(np.float32), oshape=cls.__shape_from_bytes(array.shape))

@classmethod
def __quantize_lazy(cls, lazy_tensor: LazyNumpyTensor, /) -> Any:
Expand Down
2 changes: 1 addition & 1 deletion gguf-py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [

[tool.poetry.dependencies]
python = ">=3.8"
numpy = ">=1.17"
numpy = ">=2.1"
tqdm = ">=4.27"
pyyaml = ">=5.1"
sentencepiece = ">=0.1.98,<=0.2.0"
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ classifiers = [
]

[tool.poetry.dependencies]
python = ">=3.9"
numpy = "^1.25.0"
python = ">=3.10,<3.14"
numpy = "^2.1"
sentencepiece = ">=0.1.98,<=0.2.0"
transformers = ">=4.35.2,<5.0.0"
protobuf = ">=4.21.0,<5.0.0"
Expand Down
8 changes: 6 additions & 2 deletions pyrightconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"extraPaths": ["gguf-py"],
"pythonVersion": "3.9",
"pythonVersion": "3.10",
"pythonPlatform": "All",
"reportInvalidTypeForm": false,
"reportUnusedImport": "warning",
"reportDuplicateImport": "error",
"reportDeprecated": "warning",
Expand All @@ -11,12 +12,15 @@
{
// TODO: make this version override work correctly
"root": "gguf-py",
"pythonVersion": "3.8",
"pythonVersion": "3.10",
},
{
// uses match expressions in steps.py
"root": "tools/server/tests",
"pythonVersion": "3.10",
},
],
"exclude": [
"tools/mtmd/legacy-models",
]
}
2 changes: 1 addition & 1 deletion requirements/requirements-convert_hf_to_gguf.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-r ./requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
torch~=2.2.1
torch>=2.5.1
2 changes: 1 addition & 1 deletion requirements/requirements-convert_hf_to_gguf_update.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-r ./requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
torch~=2.2.1
torch>=2.5.1
2 changes: 1 addition & 1 deletion requirements/requirements-convert_legacy_llama.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy~=1.26.4
numpy>=2.1
sentencepiece~=0.2.0
transformers>=4.45.1,<5.0.0
gguf>=0.1.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-gguf_editor_gui.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpy~=1.26.4
numpy>=2.1
PySide6~=6.9.0
gguf>=0.16.0
4 changes: 2 additions & 2 deletions requirements/requirements-tool_bench.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ aiohttp~=3.9.3
pytest~=8.3.3
huggingface_hub~=0.23.2
matplotlib~=3.10.0
numpy~=1.26.4
numpy>=2.1
openai~=1.55.3
pandas~=2.2.3
prometheus-client~=0.20.0
requests~=2.32.3
requests>=2.28.1
wget~=3.2
typer~=0.15.1
seaborn~=0.13.2
4 changes: 2 additions & 2 deletions tools/mtmd/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-r ../../requirements/requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
pillow~=10.2.0
torch~=2.2.1
torchvision~=0.17.1
torch>=2.5
torchvision>=0.20.1
4 changes: 2 additions & 2 deletions tools/server/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
aiohttp~=3.9.3
pytest~=8.3.3
huggingface_hub~=0.23.2
numpy~=1.26.4
numpy>=2.1
openai~=1.55.3
prometheus-client~=0.20.0
requests~=2.32.3
requests>=2.28.1
wget~=3.2