Skip to content

Commit e1ba793

Browse files
committed
feat: Parse the preprocessor config's longest side and add it to the mmproj hparams
Branch: GraniteDocling Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 64cef62 commit e1ba793

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,7 @@ def __init__(self, *args, **kwargs):
13281328
self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.MMPROJ, self.block_count)
13291329

13301330
# load preprocessor config
1331+
self.preprocessor_config = {}
13311332
if not self.is_mistral_format:
13321333
with open(self.dir_model / "preprocessor_config.json", "r", encoding="utf-8") as f:
13331334
self.preprocessor_config = json.load(f)
@@ -1350,7 +1351,8 @@ def set_gguf_parameters(self):
13501351
self.gguf_writer.add_vision_projection_dim(self.n_embd_text)
13511352

13521353
# vision config
1353-
self.gguf_writer.add_vision_image_size(self.find_vparam(["image_size"]))
1354+
self.image_size = self.find_vparam(["image_size"])
1355+
self.gguf_writer.add_vision_image_size(self.image_size)
13541356
self.gguf_writer.add_vision_patch_size(self.find_vparam(["patch_size"]))
13551357
self.gguf_writer.add_vision_embedding_length(self.find_vparam(["hidden_size"]))
13561358
self.gguf_writer.add_vision_feed_forward_length(self.find_vparam(["intermediate_size"]))
@@ -2381,6 +2383,10 @@ def set_gguf_parameters(self):
23812383
self.gguf_writer.add_vision_projector_scale_factor(self.global_config.get("scale_factor", 2))
23822384
self.gguf_writer.add_vision_use_gelu(True)
23832385

2386+
# Add the preprocessor longest edge size
2387+
preproc_image_size = self.preprocessor_config.get("size", {}).get("longest_edge", self.image_size)
2388+
self.gguf_writer.add_vision_preproc_image_size(preproc_image_size)
2389+
23842390
def tensor_force_quant(self, name, new_name, bid, n_dims):
23852391
if ".embeddings." in name:
23862392
return gguf.GGMLQuantizationType.F32

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ class Clip:
258258

259259
class ClipVision:
260260
IMAGE_SIZE = "clip.vision.image_size"
261+
PREPROC_IMAGE_SIZE = "clip.vision.preproc_image_size"
261262
PATCH_SIZE = "clip.vision.patch_size"
262263
EMBEDDING_LENGTH = "clip.vision.embedding_length"
263264
FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,9 @@ def add_vision_attention_layernorm_eps(self, value: float) -> None:
10281028
def add_vision_image_size(self, value: int) -> None:
10291029
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
10301030

1031+
def add_vision_preproc_image_size(self, value: int) -> None:
1032+
self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
1033+
10311034
def add_vision_image_mean(self, values: Sequence[float]) -> None:
10321035
self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
10331036

0 commit comments

Comments
 (0)