Skip to content

Commit 4abea4b

Browse files
committed
Merge branch 'upstream' into concedo_experimental
# Conflicts: # README.md # docs/build.md # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cpu/kleidiai/kernels.cpp # ggml/src/ggml-cpu/kleidiai/kernels.h # ggml/src/ggml-cpu/kleidiai/kleidiai.cpp # tests/test-backend-ops.cpp # tools/server/README.md
2 parents f5aa7c2 + 9220426 commit 4abea4b

File tree

4 files changed

+15
-13
lines changed

4 files changed

+15
-13
lines changed

ggml/src/ggml-cuda/im2col.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ static __global__ void im2col_kernel(
1010
return;
1111
}
1212

13-
const int64_t ksize = OW * (KH > 1 ? KW : 1);
13+
const int64_t ksize = OW * KH;
1414
const int64_t kx = i / ksize;
1515
const int64_t kd = kx * ksize;
1616
const int64_t ky = (i - kd) / OW;

ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,10 @@ void main() {
4040
const uint src_base = ic * p.offset_delta + batch * p.batch_offset;
4141
const uint dst_base = ((batch * p.OH + oh) * p.OW) * p.CHW + ic * (p.KW * p.KH);
4242
const int oh_s1 = int(oh) * p.s1;
43-
const uint ksize = p.OW * (p.KH > 1 ? p.KW : 1);
43+
const uint ksize = p.OW * p.KH;
4444

4545
const uint base_linear_idx = gidx * NUM_ITER;
4646

47-
const uint max_ky = ksize / p.OW;
48-
4947
uint current_kx = base_linear_idx / ksize;
5048
const uint rem = base_linear_idx - (current_kx * ksize);
5149
uint current_ky = rem / p.OW;
@@ -76,7 +74,7 @@ void main() {
7674

7775
if (++current_ix == p.OW) {
7876
current_ix = 0;
79-
if (++current_ky == max_ky) {
77+
if (++current_ky == p.KH) {
8078
current_ky = 0;
8179
current_kx++;
8280
}

tools/main/main.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -786,14 +786,17 @@ int main(int argc, char ** argv) {
786786
}
787787

788788
// check for reverse prompt using special tokens
789-
llama_token last_token = common_sampler_last(smpl);
790-
for (auto token : antiprompt_token) {
791-
if (token == last_token) {
792-
if (params.interactive) {
793-
is_interacting = true;
789+
// avoid calling common_sampler_last() if last_output is empty
790+
if (!last_output.empty()) {
791+
llama_token last_token = common_sampler_last(smpl);
792+
for (auto token : antiprompt_token) {
793+
if (token == last_token) {
794+
if (params.interactive) {
795+
is_interacting = true;
796+
}
797+
is_antiprompt = true;
798+
break;
794799
}
795-
is_antiprompt = true;
796-
break;
797800
}
798801
}
799802

tools/server/server.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4516,9 +4516,10 @@ int main(int argc, char ** argv) {
45164516
json tokens_response = json::array();
45174517
if (body.count("content") != 0) {
45184518
const bool add_special = json_value(body, "add_special", false);
4519+
const bool parse_special = json_value(body, "parse_special", true);
45194520
const bool with_pieces = json_value(body, "with_pieces", false);
45204521

4521-
llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, true);
4522+
llama_tokens tokens = tokenize_mixed(ctx_server.vocab, body.at("content"), add_special, parse_special);
45224523

45234524
if (with_pieces) {
45244525
for (const auto& token : tokens) {

0 commit comments

Comments
 (0)