Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,6 @@ charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset

[tools/mtmd/vendor/miniaudio.h]
[vendor/miniaudio/miniaudio.h]
trim_trailing_whitespace = unset
insert_final_newline = unset
13 changes: 5 additions & 8 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,20 @@ add_library(${TARGET} STATIC
arg.cpp
arg.h
base64.hpp
chat.cpp
chat.h
chat-parser.cpp
chat-parser.h
chat.cpp
chat.h
common.cpp
common.h
console.cpp
console.h
json-schema-to-grammar.cpp
json.hpp
json-partial.h
json-partial.cpp
json-partial.h
json-schema-to-grammar.cpp
llguidance.cpp
log.cpp
log.h
minja/chat-template.hpp
minja/minja.hpp
ngram-cache.cpp
ngram-cache.h
regex-partial.cpp
Expand Down Expand Up @@ -147,7 +144,7 @@ if (LLAMA_LLGUIDANCE)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
endif ()

target_include_directories(${TARGET} PUBLIC .)
target_include_directories(${TARGET} PUBLIC . ../vendor)
target_compile_features (${TARGET} PUBLIC cxx_std_17)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

Expand Down
10 changes: 6 additions & 4 deletions common/arg.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "gguf.h" // for reading GGUF splits
#include "arg.h"

#include "chat.h"
#include "common.h"
#include "gguf.h" // for reading GGUF splits
#include "json-schema-to-grammar.h"
#include "log.h"
#include "sampling.h"
#include "chat.h"

// fix problem with std::min and std::max
#if defined(_WIN32)
Expand All @@ -15,6 +16,9 @@
#include <windows.h>
#endif

#define JSON_ASSERT GGML_ASSERT
#include <nlohmann/json.hpp>

#include <algorithm>
#include <climits>
#include <cstdarg>
Expand All @@ -34,8 +38,6 @@
#include <future>
#endif

#include "json-schema-to-grammar.h"

using json = nlohmann::ordered_json;

std::initializer_list<enum llama_example> mmproj_examples = {
Expand Down
3 changes: 2 additions & 1 deletion common/chat-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

#include "chat.h"
#include "json-partial.h"
#include "json.hpp"
#include "regex-partial.h"

#include <nlohmann/json.hpp>

#include <optional>
#include <string>
#include <vector>
Expand Down
8 changes: 4 additions & 4 deletions common/chat.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#include "chat.h"
#include "chat-parser.h"
#include "common.h"
#include "json-partial.h"
#include "json-schema-to-grammar.h"
#include "log.h"
#include "json-partial.h"
#include "minja/chat-template.hpp"
#include "minja/minja.hpp"
#include "regex-partial.h"

#include <minja/chat-template.hpp>
#include <minja/minja.hpp>

#include <cstdio>
#include <exception>
#include <iostream>
Expand All @@ -16,7 +17,6 @@
#include <string>
#include <vector>


static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
auto time = std::chrono::system_clock::to_time_t(now);
auto local_time = *std::localtime(&time);
Expand Down
9 changes: 5 additions & 4 deletions common/json-partial.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include <json-partial.h>
#include "ggml.h"
#include "json-partial.h"

#include "log.h"
#include <string>

#include <json.hpp>
#include <nlohmann/json.hpp>

#include <string>

using json = nlohmann::ordered_json;

Expand Down
3 changes: 2 additions & 1 deletion common/json-partial.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <json.hpp>

#include <nlohmann/json.hpp>

// Healing marker (empty if the JSON was fully parsed / wasn't healed).
struct common_healing_marker {
Expand Down
3 changes: 2 additions & 1 deletion common/json-schema-to-grammar.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include "json-schema-to-grammar.h"
#include "common.h"

#include <nlohmann/json.hpp>

#include <algorithm>
#include <fstream>
#include <map>
#include <regex>
#include <sstream>
Expand Down
8 changes: 4 additions & 4 deletions common/json-schema-to-grammar.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#pragma once

#include "ggml.h"
// Change JSON_ASSERT from assert() to GGML_ASSERT:
#define JSON_ASSERT GGML_ASSERT
#include "json.hpp"
#include <nlohmann/json_fwd.hpp>

#include <functional>
#include <string>

std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
bool force_gbnf = false);
Expand Down
57 changes: 42 additions & 15 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,15 +523,15 @@ def set_gguf_parameters(self):
self.gguf_writer.add_context_length(n_ctx)
logger.info(f"gguf: context length = {n_ctx}")

if (n_embd := self.find_hparam(["hidden_size", "n_embd"], optional=True)) is not None:
if (n_embd := self.find_hparam(["hidden_size", "n_embd", "dim"], optional=True)) is not None:
self.gguf_writer.add_embedding_length(n_embd)
logger.info(f"gguf: embedding length = {n_embd}")

if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
if (n_ff := self.find_hparam(["intermediate_size", "n_inner", "hidden_dim"], optional=True)) is not None:
self.gguf_writer.add_feed_forward_length(n_ff)
logger.info(f"gguf: feed forward length = {n_ff}")

if (n_head := self.find_hparam(["num_attention_heads", "n_head"], optional=True)) is not None:
if (n_head := self.find_hparam(["num_attention_heads", "n_head", "n_heads"], optional=True)) is not None:
self.gguf_writer.add_head_count(n_head)
logger.info(f"gguf: head count = {n_head}")

Expand Down Expand Up @@ -674,12 +674,12 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
# ref: https://huggingface.co/tiiuae/falcon-7b
res = "falcon"
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
res = "falcon3"
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
res = "bert-bge"
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
res = "falcon3"
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
res = "bert-bge-large"
Expand Down Expand Up @@ -731,9 +731,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
res = "jina-v2-code"
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
res = "chatglm-bpe"
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
# ref: https://huggingface.co/LumiOpen/Viking-7B
res = "viking"
Expand Down Expand Up @@ -764,9 +761,6 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
# ref: https://huggingface.co/facebook/chameleon-7b
res = "chameleon"
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
res = "minerva-7b"
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
res = "roberta-bpe"
Expand Down Expand Up @@ -797,15 +791,24 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
res = "llama4"
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
res = "glm4"
if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3":
# ref: https://huggingface.co/mistral-community/pixtral-12b
res = "pixtral"
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
res = "seed-coder"
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
res = "chatglm-bpe"
if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516":
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
res = "chatglm-bpe"
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
res = "glm4"
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
res = "minerva-7b"

if res is None:
logger.warning("\n")
Expand Down Expand Up @@ -1044,6 +1047,10 @@ def _set_vocab_rwkv_world(self):
special_vocab.chat_template = "rwkv-world"
# hack: Add '\n\n' as the EOT token to make it chat normally
special_vocab._set_special_token("eot", 261)
# hack: Override these as they have already been set (incorrectly)
special_vocab.special_token_ids["bos"] = 0
special_vocab.special_token_ids["eos"] = 0

special_vocab.add_to_gguf(self.gguf_writer)

def _set_vocab_builtin(self, model_name: Literal["gpt-neox", "llama-spm"], vocab_size: int):
Expand Down Expand Up @@ -3907,6 +3914,26 @@ def _xlmroberta_set_vocab(self) -> None:
self.gguf_writer.add_add_eos_token(True)


@ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
class DistilBertModel(BertModel):
model_arch = gguf.MODEL_ARCH.BERT

def set_gguf_parameters(self):
self.gguf_writer.add_layer_norm_eps(1e-12)
logger.info("gguf: layer norm epsilon = 1e-12")
super().set_gguf_parameters()

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name.startswith("distilbert."):
name = name[11:]

# These layers act as MLM head, so we don't need them
if name.startswith("vocab_"):
return []

return super().modify_tensors(data_torch, name, bid)


@ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
class RobertaModel(BertModel):
model_arch = gguf.MODEL_ARCH.BERT
Expand Down
Loading
Loading