Skip to content

Commit a9658c7

Browse files
authored
Merge pull request #727 from corebonts/incomplete-utf
Avoid streaming incomplete UTF-8 characters
2 parents fb7ce5a + 8ff1f50 commit a9658c7

File tree

4 files changed

+77
-17
lines changed

4 files changed

+77
-17
lines changed

llamafile/server/utf.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
2+
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
3+
//
4+
// Copyright 2024 Mozilla Foundation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
#include "utils.h"
19+
#include <string>
20+
21+
namespace lf {
22+
namespace server {
23+
24+
bool ends_with_incomplete_utf8(const std::string& str) {
25+
for (unsigned i = 1; i <= 4 && i <= str.size(); ++i) {
26+
unsigned char c = str[str.size() - i];
27+
if ((c & 0xC0) == 0x80) {
28+
// continuation byte: 10xxxxxx
29+
continue;
30+
}
31+
if ((c & 0xE0) == 0xC0) {
32+
// 2-byte character: 110xxxxx ...
33+
return i < 2;
34+
} else if ((c & 0xF0) == 0xE0) {
35+
// 3-byte character: 1110xxxx ...
36+
return i < 3;
37+
} else if ((c & 0xF8) == 0xF0) {
38+
// 4-byte character: 11110xxx ...
39+
return i < 4;
40+
}
41+
// else 1-byte character or invalid byte
42+
break; // Found a valid starting byte, no need to check further.
43+
}
44+
45+
return false; // Did not find an incomplete character
46+
}
47+
48+
} // namespace server
49+
} // namespace lf

llamafile/server/utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,8 @@ remove_old_image_atoms(const std::vector<Atom>&);
5050
int
5151
count_tokens(const std::vector<Atom>&);
5252

53+
bool
54+
ends_with_incomplete_utf8(const std::string& str);
55+
5356
} // namespace server
5457
} // namespace lf

llamafile/server/v1_chat_completions.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ struct V1ChatCompletionState
7878
{
7979
std::string prompt;
8080
std::vector<Atom> atoms;
81-
std::string piece;
81+
std::string piece = "";
8282
};
8383

8484
struct V1ChatCompletionResponse
@@ -658,19 +658,23 @@ Client::v1_chat_completions()
658658
finish_reason = "stop";
659659
break;
660660
}
661-
state->piece =
661+
state->piece +=
662662
llamafile_token_to_piece(slot_->ctx_, id, DONT_RENDER_SPECIAL_TOKENS);
663663
if (!state->piece.empty()) {
664664
if (params->stream) {
665-
char* p = append_http_response_message(obuf_.p, 200);
666-
choice["delta"]["content"] = state->piece;
667-
response->json["created"] = timespec_real().tv_sec;
668-
response->content = make_event(response->json);
669-
choice.getObject().erase("delta");
670-
if (!send_response_chunk(response->content))
671-
return false;
665+
if (!ends_with_incomplete_utf8(state->piece)) {
666+
char* p = append_http_response_message(obuf_.p, 200);
667+
choice["delta"]["content"] = state->piece;
668+
response->json["created"] = timespec_real().tv_sec;
669+
response->content = make_event(response->json);
670+
choice.getObject().erase("delta");
671+
if (!send_response_chunk(response->content))
672+
return false;
673+
state->piece.clear();
674+
}
672675
} else {
673676
response->content += state->piece;
677+
state->piece.clear();
674678
}
675679
}
676680
}

llamafile/server/v1_completions.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ struct V1CompletionParams
7676
struct V1CompletionState
7777
{
7878
std::vector<Atom> atoms;
79-
std::string piece;
79+
std::string piece = "";
8080
};
8181

8282
struct V1CompletionResponse
@@ -495,18 +495,22 @@ Client::v1_completions()
495495
finish_reason = "stop";
496496
break;
497497
}
498-
state->piece =
498+
state->piece +=
499499
llamafile_token_to_piece(slot_->ctx_, id, DONT_RENDER_SPECIAL_TOKENS);
500500
if (!state->piece.empty()) {
501501
if (params->stream) {
502-
char* p = append_http_response_message(obuf_.p, 200);
503-
choice["text"] = state->piece;
504-
response->json["created"] = timespec_real().tv_sec;
505-
response->content = make_event(response->json);
506-
if (!send_response_chunk(response->content))
507-
return false;
502+
if (!ends_with_incomplete_utf8(state->piece)) {
503+
char* p = append_http_response_message(obuf_.p, 200);
504+
choice["text"] = state->piece;
505+
response->json["created"] = timespec_real().tv_sec;
506+
response->content = make_event(response->json);
507+
if (!send_response_chunk(response->content))
508+
return false;
509+
state->piece.clear();
510+
}
508511
} else {
509512
response->content += state->piece;
513+
state->piece.clear();
510514
}
511515
}
512516
}

0 commit comments

Comments
 (0)