Skip to content

Commit e37f276

Browse files
committed
clear cpu flag manually for templates, added truncation for embeddings
1 parent 8a4a9b8 commit e37f276

File tree

3 files changed

+29
-7
lines changed

3 files changed

+29
-7
lines changed

expose.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ struct embeddings_load_model_inputs
251251
struct embeddings_generation_inputs
252252
{
253253
const char * prompt = nullptr;
254+
const bool truncate = true;
254255
};
255256
struct embeddings_generation_outputs
256257
{

koboldcpp.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
dry_seq_break_max = 128
5050

5151
# global vars
52-
KcppVersion = "1.87"
52+
KcppVersion = "1.87.1"
5353
showdebug = True
5454
kcpp_instance = None #global running instance
5555
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}
@@ -332,7 +332,8 @@ class embeddings_load_model_inputs(ctypes.Structure):
332332
("debugmode", ctypes.c_int)]
333333

334334
class embeddings_generation_inputs(ctypes.Structure):
335-
_fields_ = [("prompt", ctypes.c_char_p)]
335+
_fields_ = [("prompt", ctypes.c_char_p),
336+
("truncate", ctypes.c_bool)]
336337

337338
class embeddings_generation_outputs(ctypes.Structure):
338339
_fields_ = [("status", ctypes.c_int),
@@ -1619,6 +1620,7 @@ def embeddings_generate(genparams):
16191620
try:
16201621
inputs = embeddings_generation_inputs()
16211622
inputs.prompt = prompt.encode("UTF-8")
1623+
inputs.truncate = genparams.get('truncate', True)
16221624
ret = handle.embeddings_generate(inputs)
16231625
if ret.status==1:
16241626
outstr = ret.data.decode("UTF-8","ignore")
@@ -5172,6 +5174,7 @@ def convert_args_to_template(savdict):
51725174
savdict["useclblast"] = None
51735175
savdict["usecublas"] = None
51745176
savdict["usevulkan"] = None
5177+
savdict["usecpu"] = None
51755178
savdict["tensor_split"] = None
51765179
savdict["draftgpusplit"] = None
51775180
savdict["config"] = None

otherarch/embeddings_adapter.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,30 @@ embeddings_generation_outputs embeddingstype_generate(const embeddings_generatio
201201
std::vector<std::vector<int32_t>> prompt_inputs;
202202
auto inp = common_tokenize(embeddings_ctx, prompt, true, true);
203203
if (inp.size() > n_batch) {
204-
printf("\n%s: number of tokens in an input (%lld) exceeds embedding size limit for this model (%lld), lower token amount!\n",
204+
if (inputs.truncate) {
205+
int oldsize = inp.size();
206+
//get bos token
207+
std::vector<int> bos;
208+
bos = common_tokenize(embeddings_ctx, "", true,true);
209+
int offset = inp.size() - n_batch + 1;
210+
inp = std::vector<int>(inp.begin() + offset, inp.end());
211+
//replace bos into front if exists
212+
if(bos.size()>0 && inp.size()>0)
213+
{
214+
inp[0] = bos[0];
215+
}
216+
if(embeddings_debug)
217+
{
218+
printf("\n%s: Input too long, truncated from %d to last %d tokens.\n", __func__,oldsize,inp.size());
219+
}
220+
} else {
221+
printf("\n%s: number of tokens in an input (%lld) exceeds embedding size limit for this model (%lld), lower token amount!\n",
205222
__func__, (long long int) inp.size(), (long long int) n_batch);
206-
output.data = "";
207-
output.status = 0;
208-
output.count = 0;
209-
return output;
223+
output.data = "";
224+
output.status = 0;
225+
output.count = 0;
226+
return output;
227+
}
210228
}
211229
prompt_inputs.push_back(inp);
212230

0 commit comments

Comments
 (0)