Skip to content

Commit 6131e6a

Browse files
authored
Merge b3568
b3568
2 parents a29e53c + 8cd1bcf commit 6131e6a

File tree

21 files changed

+913
-429
lines changed

21 files changed

+913
-429
lines changed

Makefile

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,26 +1454,20 @@ libllava.a: examples/llava/llava.cpp \
14541454
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
14551455

14561456
llama-llava-cli: examples/llava/llava-cli.cpp \
1457-
examples/llava/clip.h \
1458-
examples/llava/clip.cpp \
1459-
examples/llava/llava.h \
14601457
examples/llava/llava.cpp \
1458+
examples/llava/llava.h \
1459+
examples/llava/clip.cpp \
1460+
examples/llava/clip.h \
14611461
$(OBJ_ALL)
1462-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1463-
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
1464-
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
1465-
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
1462+
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14661463

14671464
llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
1468-
examples/llava/clip.h \
1469-
examples/llava/clip.cpp \
1470-
examples/llava/llava.h \
14711465
examples/llava/llava.cpp \
1466+
examples/llava/llava.h \
1467+
examples/llava/clip.cpp \
1468+
examples/llava/clip.h \
14721469
$(OBJ_ALL)
1473-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1474-
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
1475-
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
1476-
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
1470+
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14771471

14781472
ifeq ($(UNAME_S),Darwin)
14791473
swift: examples/batched.swift

common/common.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,17 @@ std::string string_get_sortable_timestamp() {
17771777
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
17781778
}
17791779

1780+
void string_replace_all(std::string & s, const std::string & search, const std::string & replace) {
1781+
if (search.empty()) {
1782+
return; // Avoid infinite loop if 'search' is an empty string
1783+
}
1784+
size_t pos = 0;
1785+
while ((pos = s.find(search, pos)) != std::string::npos) {
1786+
s.replace(pos, search.length(), replace);
1787+
pos += replace.length();
1788+
}
1789+
}
1790+
17801791
void string_process_escapes(std::string & input) {
17811792
std::size_t input_len = input.length();
17821793
std::size_t output_idx = 0;
@@ -2145,7 +2156,9 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
21452156
tmp.clear();
21462157
tmp.push_back(decoder_start_token_id);
21472158
}
2148-
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
2159+
if (llama_model_has_decoder(model)) {
2160+
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
2161+
}
21492162
llama_kv_cache_clear(lctx);
21502163
llama_synchronize(lctx);
21512164
llama_reset_timings(lctx);

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ std::vector<std::string> string_split(std::string input, char separator);
286286
std::string string_strip(const std::string & str);
287287
std::string string_get_sortable_timestamp();
288288

289+
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
290+
289291
template<class T>
290292
static std::vector<T> string_split(const std::string & str, char delim) {
291293
std::vector<T> values;

convert_hf_to_gguf.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3324,6 +3324,145 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
33243324
return [(self.map_tensor_name(name), data_torch)]
33253325

33263326

3327+
@Model.register("T5EncoderModel")
3328+
class T5EncoderModel(Model):
3329+
model_arch = gguf.MODEL_ARCH.T5ENCODER
3330+
3331+
def __init__(self, *args, **kwargs):
3332+
super().__init__(*args, **kwargs)
3333+
self.shared_token_embeddings_found = False
3334+
3335+
def set_vocab(self):
3336+
# to avoid TypeError: Descriptors cannot be created directly
3337+
# exception when importing sentencepiece_model_pb2
3338+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
3339+
from sentencepiece import SentencePieceProcessor
3340+
from sentencepiece import sentencepiece_model_pb2 as model
3341+
3342+
tokenizer_path = self.dir_model / 'tokenizer.model'
3343+
3344+
# many older models use spiece.model tokenizer model filename
3345+
if not tokenizer_path.is_file():
3346+
tokenizer_path = self.dir_model / 'spiece.model'
3347+
3348+
if not tokenizer_path.is_file():
3349+
raise FileNotFoundError(f"File not found: {tokenizer_path}")
3350+
3351+
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
3352+
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
3353+
3354+
# some models like Pile-T5 family use BPE tokenizer instead of Unigram
3355+
if sentencepiece_model.trainer_spec.model_type == 2: # BPE
3356+
# assure the tokenizer model file name is correct
3357+
assert tokenizer_path.name == 'tokenizer.model'
3358+
return self._set_vocab_sentencepiece()
3359+
else:
3360+
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
3361+
3362+
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
3363+
remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
3364+
precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
3365+
3366+
tokenizer = SentencePieceProcessor()
3367+
tokenizer.LoadFromFile(str(tokenizer_path))
3368+
3369+
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
3370+
3371+
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
3372+
scores: list[float] = [-10000.0] * vocab_size
3373+
toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size
3374+
3375+
for token_id in range(tokenizer.vocab_size()):
3376+
piece = tokenizer.IdToPiece(token_id)
3377+
text = piece.encode("utf-8")
3378+
score = tokenizer.GetScore(token_id)
3379+
3380+
toktype = SentencePieceTokenTypes.NORMAL
3381+
if tokenizer.IsUnknown(token_id):
3382+
toktype = SentencePieceTokenTypes.UNKNOWN
3383+
elif tokenizer.IsControl(token_id):
3384+
toktype = SentencePieceTokenTypes.CONTROL
3385+
elif tokenizer.IsUnused(token_id):
3386+
toktype = SentencePieceTokenTypes.UNUSED
3387+
elif tokenizer.IsByte(token_id):
3388+
toktype = SentencePieceTokenTypes.BYTE
3389+
3390+
tokens[token_id] = text
3391+
scores[token_id] = score
3392+
toktypes[token_id] = toktype
3393+
3394+
added_tokens_file = self.dir_model / 'added_tokens.json'
3395+
if added_tokens_file.is_file():
3396+
with open(added_tokens_file, "r", encoding="utf-8") as f:
3397+
added_tokens_json = json.load(f)
3398+
for key in added_tokens_json:
3399+
token_id = added_tokens_json[key]
3400+
if token_id >= vocab_size:
3401+
logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
3402+
continue
3403+
3404+
tokens[token_id] = key.encode("utf-8")
3405+
scores[token_id] = -1000.0
3406+
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
3407+
3408+
if vocab_size > len(tokens):
3409+
pad_count = vocab_size - len(tokens)
3410+
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
3411+
for i in range(1, pad_count + 1):
3412+
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
3413+
scores.append(-1000.0)
3414+
toktypes.append(SentencePieceTokenTypes.UNUSED)
3415+
3416+
self.gguf_writer.add_tokenizer_model("t5")
3417+
self.gguf_writer.add_tokenizer_pre("default")
3418+
self.gguf_writer.add_token_list(tokens)
3419+
self.gguf_writer.add_token_scores(scores)
3420+
self.gguf_writer.add_token_types(toktypes)
3421+
self.gguf_writer.add_add_space_prefix(add_prefix)
3422+
self.gguf_writer.add_remove_extra_whitespaces(remove_whitespaces)
3423+
if precompiled_charsmap:
3424+
self.gguf_writer.add_precompiled_charsmap(precompiled_charsmap)
3425+
3426+
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
3427+
special_vocab.add_to_gguf(self.gguf_writer)
3428+
3429+
self.gguf_writer.add_add_bos_token(False)
3430+
self.gguf_writer.add_add_eos_token(True)
3431+
3432+
def set_gguf_parameters(self):
3433+
if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
3434+
logger.warning("Couldn't find context length in config.json, assuming default value of 512")
3435+
n_ctx = 512
3436+
self.gguf_writer.add_context_length(n_ctx)
3437+
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
3438+
self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
3439+
self.gguf_writer.add_block_count(self.hparams["num_layers"])
3440+
self.gguf_writer.add_head_count(self.hparams["num_heads"])
3441+
self.gguf_writer.add_key_length(self.hparams["d_kv"])
3442+
self.gguf_writer.add_value_length(self.hparams["d_kv"])
3443+
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
3444+
self.gguf_writer.add_relative_attn_buckets_count(self.hparams["relative_attention_num_buckets"])
3445+
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
3446+
self.gguf_writer.add_file_type(self.ftype)
3447+
3448+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3449+
del bid # unused
3450+
3451+
# T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
3452+
# "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
3453+
# in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
3454+
# and decoder and ignore the remaining ones.
3455+
if name in ["decoder.embed_tokens.weight", "encoder.embed_tokens.weight", "shared.weight"]:
3456+
if not self.shared_token_embeddings_found:
3457+
name = "shared.weight"
3458+
self.shared_token_embeddings_found = True
3459+
else:
3460+
logger.debug(f"Skipping shared tensor {name!r} in safetensors so that convert can end normally.")
3461+
return []
3462+
3463+
return [(self.map_tensor_name(name), data_torch)]
3464+
3465+
33273466
@Model.register("JAISLMHeadModel")
33283467
class JaisModel(Model):
33293468
model_arch = gguf.MODEL_ARCH.JAIS

0 commit comments

Comments
 (0)