Skip to content

Commit 3d958f3

Browse files
Merge pull request #274 from menloresearch/update-dev-from-master-2025-10-06-00-34
Sync master with upstream release b6692
2 parents 8cf98e9 + ca71fb9 commit 3d958f3

File tree

13 files changed

+175
-98
lines changed

13 files changed

+175
-98
lines changed

convert_hf_to_gguf.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
891891
if chkhsh == "9b1be57e70d20d9501b2b3186e792d81181ae36ada3903c26f9fea418cf87206":
892892
# ref: https://huggingface.co/inclusionAI/LLaDA-MoE-7B-A1B-Base
893893
res = "llada-moe"
894+
if chkhsh == "53e325976a6e142379c19b09afcae354f2f496f147afa8f9e189a33fe4e3024e":
895+
# ref: https://huggingface.co/ibm-granite/granite-docling-258M
896+
res = "granite-docling"
894897

895898
if res is None:
896899
logger.warning("\n")
@@ -1325,6 +1328,7 @@ def __init__(self, *args, **kwargs):
13251328
self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.MMPROJ, self.block_count)
13261329

13271330
# load preprocessor config
1331+
self.preprocessor_config = {}
13281332
if not self.is_mistral_format:
13291333
with open(self.dir_model / "preprocessor_config.json", "r", encoding="utf-8") as f:
13301334
self.preprocessor_config = json.load(f)
@@ -1347,7 +1351,8 @@ def set_gguf_parameters(self):
13471351
self.gguf_writer.add_vision_projection_dim(self.n_embd_text)
13481352

13491353
# vision config
1350-
self.gguf_writer.add_vision_image_size(self.find_vparam(["image_size"]))
1354+
self.image_size = self.find_vparam(["image_size"])
1355+
self.gguf_writer.add_vision_image_size(self.image_size)
13511356
self.gguf_writer.add_vision_patch_size(self.find_vparam(["patch_size"]))
13521357
self.gguf_writer.add_vision_embedding_length(self.find_vparam(["hidden_size"]))
13531358
self.gguf_writer.add_vision_feed_forward_length(self.find_vparam(["intermediate_size"]))
@@ -2378,6 +2383,10 @@ def set_gguf_parameters(self):
23782383
self.gguf_writer.add_vision_projector_scale_factor(self.global_config.get("scale_factor", 2))
23792384
self.gguf_writer.add_vision_use_gelu(True)
23802385

2386+
# Add the preprocessor longest edge size
2387+
preproc_image_size = self.preprocessor_config.get("size", {}).get("longest_edge", self.image_size)
2388+
self.gguf_writer.add_vision_preproc_image_size(preproc_image_size)
2389+
23812390
def tensor_force_quant(self, name, new_name, bid, n_dims):
23822391
if ".embeddings." in name:
23832392
return gguf.GGMLQuantizationType.F32

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ class TOKENIZER_TYPE(IntEnum):
140140
{"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", },
141141
{"name": "mellum", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum-4b-base", },
142142
{"name": "llada-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/LLaDA-MoE-7B-A1B-Base", },
143+
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
143144
]
144145

145146
# some models are known to be broken upstream, so we will skip them as exceptions

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ static void ggml_backend_webgpu_build_and_enqueue(webgpu_context &
424424
ctx->staged_param_bufs.push_back(params_bufs);
425425
if (ctx->staged_command_bufs.size() == WEBGPU_COMMAND_SUBMIT_BATCH_SIZE) {
426426
ggml_backend_webgpu_submit_queue(ctx);
427+
ggml_backend_webgpu_wait_on_submission(ctx);
427428
}
428429
}
429430
}
@@ -1060,6 +1061,9 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node) {
10601061
case GGML_OP_SCALE:
10611062
ggml_webgpu_scale(ctx, src0, node);
10621063
break;
1064+
case GGML_OP_SOFT_MAX:
1065+
ggml_webgpu_soft_max(ctx, src0, src1, src2, node);
1066+
break;
10631067
default:
10641068
return false;
10651069
}
@@ -1806,6 +1810,9 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const
18061810
case GGML_OP_SCALE:
18071811
supports_op = op->type == GGML_TYPE_F32;
18081812
break;
1813+
case GGML_OP_SOFT_MAX:
1814+
supports_op = op->type == GGML_TYPE_F32;
1815+
break;
18091816
default:
18101817
break;
18111818
}
@@ -1949,6 +1956,7 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
19491956
ggml_webgpu_init_rope_pipeline(ctx);
19501957
ggml_webgpu_init_glu_pipeline(ctx);
19511958
ggml_webgpu_init_scale_pipeline(ctx);
1959+
ggml_webgpu_init_soft_max_pipeline(ctx);
19521960

19531961
#ifdef GGML_WEBGPU_DEBUG
19541962
// Initialize debug buffers

ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn main(@builtin(workgroup_id) wid: vec3<u32>,
8484
let i2 = i / params.ne1;
8585
let i1 = i % params.ne1;
8686
let i_src_row = params.offset_src + i3 * params.stride_src3 + i2 * params.stride_src2 + i1 * params.stride_src1;
87-
let i_dst_row = params.offset_src + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1;
87+
let i_dst_row = params.offset_dst + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1;
8888

8989
let elems = (params.ne0 + wg_size - 1) / wg_size;
9090

ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ fn main(@builtin(workgroup_id) wid: vec3<u32>,
300300
workgroupBarrier();
301301
}
302302
let row_max = scratch[0];
303+
workgroupBarrier();
303304

304305
var sum = 0.0f;
305306
col = lid.x;

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ class Clip:
261261

262262
class ClipVision:
263263
IMAGE_SIZE = "clip.vision.image_size"
264+
PREPROC_IMAGE_SIZE = "clip.vision.preproc_image_size"
264265
PATCH_SIZE = "clip.vision.patch_size"
265266
EMBEDDING_LENGTH = "clip.vision.embedding_length"
266267
FEED_FORWARD_LENGTH = "clip.vision.feed_forward_length"

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,9 @@ def add_vision_attention_layernorm_eps(self, value: float) -> None:
10371037
def add_vision_image_size(self, value: int) -> None:
10381038
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
10391039

1040+
def add_vision_preproc_image_size(self, value: int) -> None:
1041+
self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
1042+
10401043
def add_vision_image_mean(self, values: Sequence[float]) -> None:
10411044
self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
10421045

src/llama-vocab.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
347347
case LLAMA_VOCAB_PRE_TYPE_OLMO:
348348
case LLAMA_VOCAB_PRE_TYPE_JAIS:
349349
case LLAMA_VOCAB_PRE_TYPE_TRILLION:
350+
case LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING:
350351
regex_exprs = {
351352
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
352353
};
@@ -1961,6 +1962,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
19611962
tokenizer_pre == "trillion") {
19621963
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
19631964
clean_spaces = false;
1965+
} else if (
1966+
tokenizer_pre == "granite-docling") {
1967+
pre_type = LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING;
1968+
clean_spaces = false;
19641969
} else if (
19651970
tokenizer_pre == "bailingmoe" ||
19661971
tokenizer_pre == "llada-moe") {

src/llama-vocab.h

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,46 +8,47 @@
88

99
// pre-tokenization types
1010
enum llama_vocab_pre_type {
11-
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
12-
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
13-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
14-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
15-
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
16-
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
17-
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
18-
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
19-
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
20-
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
21-
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
22-
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
23-
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
24-
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
25-
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
26-
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
27-
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
28-
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
29-
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
30-
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
31-
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
32-
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
33-
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
34-
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
35-
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
36-
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
37-
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
38-
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
39-
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
40-
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
41-
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
42-
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
43-
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
44-
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
45-
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
46-
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
47-
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
48-
LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37,
49-
LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38,
50-
LLAMA_VOCAB_PRE_TYPE_GROK_2 = 39,
11+
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
12+
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
13+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
14+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
15+
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
16+
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
17+
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
18+
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
19+
LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
20+
LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
21+
LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
22+
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
23+
LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
24+
LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
25+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
26+
LLAMA_VOCAB_PRE_TYPE_PORO = 15,
27+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
28+
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
29+
LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
30+
LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
31+
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
32+
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
33+
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
34+
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
35+
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
36+
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
37+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
38+
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
39+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
40+
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
41+
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
42+
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
43+
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
44+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
45+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
46+
LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
47+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
48+
LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37,
49+
LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38,
50+
LLAMA_VOCAB_PRE_TYPE_GROK_2 = 39,
51+
LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
5152
};
5253

5354
struct LLM_KV;

tools/mtmd/clip-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
// vision-specific
3333
#define KEY_IMAGE_SIZE "clip.vision.image_size"
34+
#define KEY_PREPROC_IMAGE_SIZE "clip.vision.preproc_image_size"
3435
#define KEY_PATCH_SIZE "clip.vision.patch_size"
3536
#define KEY_IMAGE_MEAN "clip.vision.image_mean"
3637
#define KEY_IMAGE_STD "clip.vision.image_std"

0 commit comments

Comments
 (0)