1111#include < sstream>
1212#include < stdexcept>
1313
14+ #if defined(GGML_USE_CLBLAST)
15+ # include " ggml_v3b-opencl.h"
16+ #endif
17+
1418static const size_t kiB = 1024 ;
1519static const size_t MiB = 1024 *kiB;
1620static const size_t GiB = 1024 *MiB;
@@ -150,6 +154,9 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
150154 throw std::runtime_error (format (" failed to create ggml context" ));
151155 }
152156
157+ #if defined(GGML_USE_CLBLAST)
158+ ggml_cl_init ();
159+ #endif
153160 ggml_backend_buffer_ptr buf { ggml_backend_buft_alloc_buffer (buft, 0 ) };
154161 ggml_tensor * op_tensor = fn (ctx.get ());
155162 for (int i = 0 ; i < GGML_MAX_SRC; i++) {
@@ -1153,6 +1160,16 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
11531160 const int n_merges = gguf_get_arr_n (ctx, merges_keyidx);
11541161 for (int i = 0 ; i < n_merges; i++) {
11551162 const std::string word = gguf_get_arr_str (ctx, merges_keyidx, i);
1163+ if (!OldBPETokenizerMode)
1164+ {
1165+ auto validcodepoints = unicode_cpts_from_utf8 (word).size () > 0 ;
1166+ GGML_ASSERT_CONTINUE (validcodepoints);
1167+ if (!validcodepoints)
1168+ {
1169+ OldBPETokenizerMode = true ;
1170+ printf (" \n Falling Back to older tokenizer..." );
1171+ }
1172+ }
11561173 GGML_ASSERT (unicode_cpts_from_utf8 (word).size () > 0 );
11571174
11581175 std::string first;
@@ -1398,10 +1415,13 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
13981415
13991416 for (uint32_t i = 0 ; i < n_vocab; i++) {
14001417 std::string word = gguf_get_arr_str (ctx, token_idx, i);
1401- if (word.empty ()) {
1418+ if (!OldBPETokenizerMode)
1419+ {
1420+ if (word.empty ()) {
14021421 LLAMA_LOG_WARN (" %s: empty token at index %u\n " , __func__, i);
14031422 word = " [EMPTY_" + std::to_string (i) + " ]" ;
14041423 }
1424+ }
14051425
14061426 vocab.token_to_id [word] = i;
14071427 vocab.max_token_len = std::max (vocab.max_token_len , (int ) word.size ());
@@ -1424,7 +1444,7 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
14241444 }
14251445 }
14261446 }
1427- GGML_ASSERT (vocab.id_to_token .size () == vocab.token_to_id .size ());
1447+ GGML_ASSERT_CONTINUE (vocab.id_to_token .size () == vocab.token_to_id .size ());
14281448
14291449 vocab.init_tokenizer ();
14301450
@@ -1681,8 +1701,8 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
16811701 } else {
16821702 // token is control, but not marked as EOG -> print a debug log
16831703 if (vocab.id_to_token [t.second ].attr & LLAMA_TOKEN_ATTR_CONTROL && vocab.special_eog_ids .count (t.second ) == 0 ) {
1684- LLAMA_LOG_DEBUG (" %s: control token: %6d '%s' is not marked as EOG\n " ,
1685- __func__, t.second , t.first .c_str ());
1704+ // LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
1705+ // __func__, t.second, t.first.c_str());
16861706 }
16871707 }
16881708 }
0 commit comments