66 * LICENSE file in the root directory of this source tree.
77 */
88
9- #include < cstdint>
109#include < chrono>
10+ #include < cstdint>
1111#include < memory>
1212#include < string>
1313#include < unordered_map>
@@ -33,29 +33,40 @@ using ::executorch::runtime::Error;
3333
3434namespace {
3535bool utf8_check_validity (const char * str, size_t length) {
36- for (size_t i = 0 ; i < length; ++i) {
37- uint8_t byte = static_cast <uint8_t >(str[i]);
38- if (byte >= 0x80 ) { // Non-ASCII byte
39- if (i + 1 >= length) { // Incomplete sequence
40- return false ;
41- }
42- uint8_t next_byte = static_cast <uint8_t >(str[i + 1 ]);
43- if ((byte & 0xE0 ) == 0xC0 && (next_byte & 0xC0 ) == 0x80 ) { // 2-byte sequence
44- i += 2 ;
45- } else if ((byte & 0xF0 ) == 0xE0 && (next_byte & 0xC0 ) == 0x80 && (i + 2 < length) && (static_cast <uint8_t >(str[i + 2 ]) & 0xC0 ) == 0x80 ) { // 3-byte sequence
46- i += 3 ;
47- } else if ((byte & 0xF8 ) == 0xF0 && (next_byte & 0xC0 ) == 0x80 && (i + 2 < length) && (static_cast <uint8_t >(str[i + 2 ]) & 0xC0 ) == 0x80 && (i + 3 < length) && (static_cast <uint8_t >(str[i + 3 ]) & 0xC0 ) == 0x80 ) { // 4-byte sequence
48- i += 4 ;
49- } else {
50- return false ; // Invalid sequence
51- }
52- }
36+ for (size_t i = 0 ; i < length; ++i) {
37+ uint8_t byte = static_cast <uint8_t >(str[i]);
38+ if (byte >= 0x80 ) { // Non-ASCII byte
39+ if (i + 1 >= length) { // Incomplete sequence
40+ return false ;
41+ }
42+ uint8_t next_byte = static_cast <uint8_t >(str[i + 1 ]);
43+ if ((byte & 0xE0 ) == 0xC0 &&
44+ (next_byte & 0xC0 ) == 0x80 ) { // 2-byte sequence
45+ i += 2 ;
46+ } else if (
47+ (byte & 0xF0 ) == 0xE0 && (next_byte & 0xC0 ) == 0x80 &&
48+ (i + 2 < length) &&
49+ (static_cast <uint8_t >(str[i + 2 ]) & 0xC0 ) ==
50+ 0x80 ) { // 3-byte sequence
51+ i += 3 ;
52+ } else if (
53+ (byte & 0xF8 ) == 0xF0 && (next_byte & 0xC0 ) == 0x80 &&
54+ (i + 2 < length) &&
55+ (static_cast <uint8_t >(str[i + 2 ]) & 0xC0 ) == 0x80 &&
56+ (i + 3 < length) &&
57+ (static_cast <uint8_t >(str[i + 3 ]) & 0xC0 ) ==
58+ 0x80 ) { // 4-byte sequence
59+ i += 4 ;
60+ } else {
61+ return false ; // Invalid sequence
62+ }
5363 }
54- return true ; // All bytes were valid
64+ }
65+ return true ; // All bytes were valid
5566}
5667
5768std::string token_buffer;
58- }
69+ } // namespace
5970
6071namespace executorch_jni {
6172
@@ -69,10 +80,11 @@ class ExecuTorchLlamaCallbackJni
6980 static auto cls = ExecuTorchLlamaCallbackJni::javaClassStatic ();
7081 static const auto method =
7182 cls->getMethod <void (facebook::jni::local_ref<jstring>)>(" onResult" );
72-
83+
7384 token_buffer += result;
7485 if (!utf8_check_validity (token_buffer.c_str (), token_buffer.size ())) {
75- ET_LOG (Info, " Current token buffer is not valid UTF-8. Waiting for more." );
86+ ET_LOG (
87+ Info, " Current token buffer is not valid UTF-8. Waiting for more." );
7688 return ;
7789 }
7890 result = token_buffer;
0 commit comments