Skip to content

Commit d8861ed

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents f4affdc + 8308f98 commit d8861ed

40 files changed

+1089
-1048
lines changed

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ function gg_run_rerank_tiny {
779779
model_f16="${path_models}/ggml-model-f16.gguf"
780780

781781
# for this model, the SEP token is "</s>"
782-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
782+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
783783

784784
# sample output
785785
# rerank score 0: 0.029

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2706,6 +2706,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27062706
params.embd_sep = value;
27072707
}
27082708
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
2709+
add_opt(common_arg(
2710+
{"--cls-separator"}, "STRING",
2711+
"separator of classification sequences (default \\t) for example \"<#seq#>\"",
2712+
[](common_params & params, const std::string & value) {
2713+
params.cls_sep = value;
2714+
}
2715+
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
27092716
add_opt(common_arg(
27102717
{"--host"}, "HOST",
27112718
string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()),

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ struct common_params {
358358
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
359359
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
360360
std::string embd_sep = "\n"; // separator of embeddings
361+
std::string cls_sep = "\t"; // separator of classification sequences
361362

362363
// server params
363364
int32_t port = 8080; // server listens on this network port

convert_hf_to_gguf.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2145,7 +2145,6 @@ def __init__(self, *args, **kwargs):
21452145

21462146
def set_vocab(self):
21472147
self._set_vocab_gpt2()
2148-
self.gguf_writer.add_add_bos_token(True)
21492148

21502149
def set_gguf_parameters(self):
21512150
super().set_gguf_parameters()
@@ -3918,9 +3917,6 @@ def _xlmroberta_set_vocab(self) -> None:
39183917
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
39193918
special_vocab.add_to_gguf(self.gguf_writer)
39203919

3921-
self.gguf_writer.add_add_bos_token(True)
3922-
self.gguf_writer.add_add_eos_token(True)
3923-
39243920

39253921
@ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
39263922
class DistilBertModel(BertModel):
@@ -3962,8 +3958,6 @@ def set_vocab(self):
39623958
bpe_tok_path = self.dir_model / "tokenizer.json"
39633959
if bpe_tok_path.exists():
39643960
self._set_vocab_gpt2()
3965-
self.gguf_writer.add_add_bos_token(True)
3966-
self.gguf_writer.add_add_eos_token(True)
39673961

39683962
# we need this to validate the size of the token_type embeddings
39693963
# though currently we are passing all zeros to the token_type embeddings
@@ -4848,8 +4842,6 @@ def set_vocab(self):
48484842
self.gguf_writer.add_token_type_count(2)
48494843
else:
48504844
raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
4851-
self.gguf_writer.add_add_bos_token(True)
4852-
self.gguf_writer.add_add_eos_token(True)
48534845

48544846

48554847
@ModelBase.register("OpenELMForCausalLM")
@@ -5451,9 +5443,6 @@ def set_vocab(self):
54515443
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
54525444
special_vocab.add_to_gguf(self.gguf_writer)
54535445

5454-
self.gguf_writer.add_add_bos_token(False)
5455-
self.gguf_writer.add_add_eos_token(True)
5456-
54575446
def set_gguf_parameters(self):
54585447
if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
54595448
logger.warning("Couldn't find context length in config.json, assuming default value of 512")
@@ -5591,9 +5580,6 @@ def set_vocab(self):
55915580
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
55925581
special_vocab.add_to_gguf(self.gguf_writer)
55935582

5594-
self.gguf_writer.add_add_bos_token(False)
5595-
self.gguf_writer.add_add_eos_token(True)
5596-
55975583
def set_gguf_parameters(self):
55985584
if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
55995585
logger.warning("Couldn't find context length in config.json, assuming default value of 512")

examples/embedding/embedding.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,36 @@ int main(int argc, char ** argv) {
133133
// max batch size
134134
const uint64_t n_batch = params.n_batch;
135135

136+
// get added sep and eos token, if any
137+
const std::string added_sep_token = llama_vocab_get_add_sep(vocab) ? llama_vocab_get_text(vocab, llama_vocab_sep(vocab)) : "";
138+
const std::string added_eos_token = llama_vocab_get_add_eos(vocab) ? llama_vocab_get_text(vocab, llama_vocab_eos(vocab)) : "";
139+
136140
// tokenize the prompts and trim
137141
std::vector<std::vector<int32_t>> inputs;
138142
for (const auto & prompt : prompts) {
139-
auto inp = common_tokenize(ctx, prompt, true, true);
143+
std::vector<llama_token> inp;
144+
145+
// split classification pairs and insert expected separator tokens
146+
if (pooling_type == LLAMA_POOLING_TYPE_RANK && prompt.find(params.cls_sep) != std::string::npos) {
147+
std::vector<std::string> pairs = split_lines(prompt, params.cls_sep);
148+
std::string final_prompt;
149+
150+
for (size_t i = 0; i < pairs.size(); i++) {
151+
final_prompt += pairs[i];
152+
if (i != pairs.size() - 1) {
153+
if (!added_eos_token.empty()) {
154+
final_prompt += added_eos_token;
155+
}
156+
if (!added_sep_token.empty()) {
157+
final_prompt += added_sep_token;
158+
}
159+
}
160+
}
161+
162+
inp = common_tokenize(ctx, final_prompt, true, true);
163+
} else {
164+
inp = common_tokenize(ctx, prompt, true, true);
165+
}
140166
if (inp.size() > n_batch) {
141167
LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
142168
__func__, (long long int) inp.size(), (long long int) n_batch);
@@ -145,11 +171,11 @@ int main(int argc, char ** argv) {
145171
inputs.push_back(inp);
146172
}
147173

148-
// check if the last token is SEP
174+
// check if the last token is SEP/EOS
149175
// it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true'
150176
for (auto & inp : inputs) {
151-
if (inp.empty() || inp.back() != llama_vocab_sep(vocab)) {
152-
LOG_WRN("%s: last token in the prompt is not SEP\n", __func__);
177+
if (inp.empty() || (inp.back() != llama_vocab_sep(vocab) && inp.back() != llama_vocab_eos(vocab))) {
178+
LOG_WRN("%s: last token in the prompt is not SEP or EOS\n", __func__);
153179
LOG_WRN("%s: 'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__);
154180
}
155181
}

ggml/src/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
286286
foreach (feat ${ARGN})
287287
set(GGML_INTERNAL_${feat} ON)
288288
endforeach()
289+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
290+
foreach (feat ${ARGN})
291+
set(GGML_INTERNAL_${feat} ON)
292+
endforeach()
289293
endif()
290294

291295
ggml_add_cpu_backend_variant_impl(${tag_name})
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
337341
else()
338342
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
339343
endif()
344+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
345+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
346+
ggml_add_cpu_backend_variant(power0)
347+
ggml_add_cpu_backend_variant(power7_1 POWER7)
348+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
349+
ggml_add_cpu_backend_variant(power8_1 POWER8)
350+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
351+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
352+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
353+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
354+
else()
355+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
356+
endif()
340357
else()
341358
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
342359
endif()

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
388388
else()
389389
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
390390
endif()
391+
elseif(GGML_CPU_ALL_VARIANTS)
392+
# Begin with the lowest baseline
393+
set(ARCH_DEFINITIONS "")
394+
395+
# When a feature is selected, bump the MCPU to the first
396+
# version that supported it
397+
foreach(PVER RANGE 7 11)
398+
if(DEFINED GGML_INTERNAL_POWER${PVER})
399+
set(POWERPC_MCPU "power${PVER}")
400+
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
401+
endif()
402+
endforeach()
403+
if (GGML_INTERNAL_VSX)
404+
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
405+
list(APPEND ARCH_FLAGS -mvsx)
406+
endif()
407+
408+
if (DEFINED POWERPC_MCPU)
409+
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
410+
endif()
411+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
391412
else()
392413
if (GGML_CPU_POWERPC_CPUTYPE)
393414
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# include "ggml-backend-impl.h"
2+
3+
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
4+
5+
#if defined(__linux__)
6+
#include <sys/auxv.h>
7+
#endif
8+
9+
#include <string>
10+
11+
struct powerpc_features {
12+
std::string platform = "";
13+
int power_version = -1;
14+
15+
bool has_vsx = false;
16+
17+
powerpc_features() {
18+
#if defined(__linux__)
19+
unsigned long auxval = getauxval(AT_PLATFORM);
20+
if (auxval) {
21+
platform = std::string(reinterpret_cast<const char*>(auxval));
22+
// TBD: Do systems exist that return this in uppercase?
23+
if (platform.substr(0, 5) == "power") {
24+
// Extractt a numeric suffix, if one exists
25+
int vpos = -1;
26+
for (int i = platform.length() - 1; i >= 0; i--) {
27+
if (std::isdigit(platform[i])) {
28+
vpos = i;
29+
} else {
30+
break;
31+
}
32+
}
33+
if (vpos > -1) {
34+
power_version = std::stoi(platform.substr(vpos));
35+
}
36+
}
37+
}
38+
#endif
39+
if (power_version >= 9) {
40+
has_vsx = true;
41+
}
42+
}
43+
};
44+
45+
static int ggml_backend_cpu_powerpc_score() {
46+
int score = 1;
47+
powerpc_features pf;
48+
49+
// Platform scores
50+
#if defined(GGML_USE_POWER7)
51+
if (pf.power_version < 7) { return 0; }
52+
score += 1<<1;
53+
#endif
54+
#if defined(GGML_USE_POWER8)
55+
if (pf.power_version < 8) { return 0; }
56+
score += 1<<2;
57+
#endif
58+
#if defined(GGML_USE_POWER9)
59+
if (pf.power_version < 9) { return 0; }
60+
score += 1<<3;
61+
#endif
62+
#if defined(GGML_USE_POWER10)
63+
if (pf.power_version < 10) { return 0; }
64+
score += 1<<4;
65+
#endif
66+
#if defined(GGML_USE_POWER11)
67+
if (pf.power_version < 11) { return 0; }
68+
score += 1<<5;
69+
#endif
70+
71+
// Feature scores
72+
#if defined(GGML_USE_VSX)
73+
if (!pf.has_vsx) { return 0; }
74+
score += 1<<6;
75+
#endif
76+
77+
return score;
78+
}
79+
80+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
81+
82+
#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)

ggml/src/ggml-cpu/repack.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
14111411
}
14121412
};
14131413

1414-
// instance for Q4
1415-
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1416-
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1417-
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1418-
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1419-
1420-
// instance for IQ4
1421-
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1422-
14231414
} // namespace ggml::cpu::repack
14241415

14251416
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
1417+
1418+
// instance for Q4
1419+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1420+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1421+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1422+
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1423+
1424+
// instance for IQ4
1425+
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1426+
14261427
if (cur->type == GGML_TYPE_Q4_0) {
14271428
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
14281429
if (cur->ne[1] % 8 == 0) {
1429-
return &ggml::cpu::repack::q4_0_8x8_q8_0;
1430+
return &q4_0_8x8_q8_0;
14301431
}
14311432
}
14321433
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
14331434
if (cur->ne[1] % 4 == 0) {
1434-
return &ggml::cpu::repack::q4_0_4x8_q8_0;
1435+
return &q4_0_4x8_q8_0;
14351436
}
14361437
}
14371438
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14381439
if (cur->ne[1] % 4 == 0) {
1439-
return &ggml::cpu::repack::q4_0_4x4_q8_0;
1440+
return &q4_0_4x4_q8_0;
14401441
}
14411442
}
14421443
} else if (cur->type == GGML_TYPE_Q4_K) {
14431444
if (ggml_cpu_has_avx2()) {
14441445
if (cur->ne[1] % 8 == 0) {
1445-
return &ggml::cpu::repack::q4_K_8x8_q8_K;
1446+
return &q4_K_8x8_q8_K;
14461447
}
14471448
}
14481449
} else if (cur->type == GGML_TYPE_IQ4_NL) {
14491450
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14501451
if (cur->ne[1] % 4 == 0) {
1451-
return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
1452+
return &iq4_nl_4x4_q8_0;
14521453
}
14531454
}
14541455
}

ggml/src/ggml-cuda/common.cuh

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
#endif
2020
#include "ggml-common.h"
2121

22-
#include <cstdio>
2322
#include <array>
2423
#include <cassert>
2524
#include <cfloat>
25+
#include <cstdio>
2626
#include <string>
2727
#include <vector>
2828

@@ -767,21 +767,7 @@ struct ggml_backend_cuda_context {
767767
name(GGML_CUDA_NAME + std::to_string(device)) {
768768
}
769769

770-
~ggml_backend_cuda_context() {
771-
if (copy_event != nullptr) {
772-
CUDA_CHECK(cudaEventDestroy(copy_event));
773-
}
774-
for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) {
775-
for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) {
776-
if (streams[i][j] != nullptr) {
777-
CUDA_CHECK(cudaStreamDestroy(streams[i][j]));
778-
}
779-
}
780-
if (cublas_handles[i] != nullptr) {
781-
CUBLAS_CHECK(cublasDestroy(cublas_handles[i]));
782-
}
783-
}
784-
}
770+
~ggml_backend_cuda_context();
785771

786772
cudaStream_t stream(int device, int stream) {
787773
if (streams[device][stream] == nullptr) {

0 commit comments

Comments
 (0)