@@ -707,28 +707,28 @@ struct llm_tokenizer_wpm_session {
707707//
708708
709709struct llm_tokenizer_ugm : llm_tokenizer {
710- llm_tokenizer_ugm (const llama_vocab & vocab) {
711- if (vocab. precompiled_charsmap .size () > 0 ) {
710+ llm_tokenizer_ugm (const llama_vocab & vocab, const std::vector< char > & precompiled_charsmap ) {
711+ if (precompiled_charsmap.size () > 0 ) {
712712 size_t charsmap_offset = 0 ;
713713
714714 // First four bytes of precompiled_charsmap contains length of binary
715715 // blob containing XOR-compressed compact double array (XCDA) entries
716- uint32_t xcda_blob_size = *(const uint32_t *) &vocab. precompiled_charsmap [0 ];
716+ uint32_t xcda_blob_size = *(const uint32_t *) &precompiled_charsmap[0 ];
717717 charsmap_offset += sizeof (xcda_blob_size);
718- if (xcda_blob_size + charsmap_offset >= vocab. precompiled_charsmap .size ()) {
718+ if (xcda_blob_size + charsmap_offset >= precompiled_charsmap.size ()) {
719719 throw std::runtime_error (" Index out of array bounds in precompiled charsmap!" );
720720 }
721721
722722 // Next xcda_blob_size bytes contain entries of XOR-compressed compact
723723 // double array (XCDA). Each entry is bit-packed into a 32-bit integer.
724- xcda_array = (const uint32_t *) &vocab. precompiled_charsmap [charsmap_offset];
724+ xcda_array = (const uint32_t *) &precompiled_charsmap[charsmap_offset];
725725 xcda_array_size = xcda_blob_size / sizeof (uint32_t );
726726 charsmap_offset += xcda_blob_size;
727727
728728 // Remaining bytes of precompiled charsmap contain null-terminated
729729 // replacement strings for prefixes matched by the XCDA.
730- prefix_replacements = &vocab. precompiled_charsmap [charsmap_offset];
731- prefix_replacements_size = vocab. precompiled_charsmap .size () - charsmap_offset;
730+ prefix_replacements = &precompiled_charsmap[charsmap_offset];
731+ prefix_replacements_size = precompiled_charsmap.size () - charsmap_offset;
732732 }
733733
734734 for (unsigned int id = 0 ; id < vocab.id_to_token .size (); ++id) {
@@ -1169,6 +1169,8 @@ struct llm_tokenizer_rwkv_session {
11691169struct llama_vocab ::impl {
11701170 std::unique_ptr<llm_tokenizer> tokenizer;
11711171
1172+ std::vector<char > precompiled_charsmap;
1173+
11721174 impl (const llama_vocab & vocab) : vocab(vocab) {
11731175 }
11741176
@@ -1195,7 +1197,7 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) {
11951197 tokenizer = std::make_unique<llm_tokenizer_wpm>(vocab);
11961198 break ;
11971199 case LLAMA_VOCAB_TYPE_UGM:
1198- tokenizer = std::make_unique<llm_tokenizer_ugm>(vocab);
1200+ tokenizer = std::make_unique<llm_tokenizer_ugm>(vocab, precompiled_charsmap );
11991201 break ;
12001202 case LLAMA_VOCAB_TYPE_RWKV:
12011203 tokenizer = std::make_unique<llm_tokenizer_rwkv>(vocab);
@@ -1334,14 +1336,14 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
13341336 if (precompiled_charsmap_keyidx != -1 ) {
13351337 size_t n_precompiled_charsmap = gguf_get_arr_n (ctx, precompiled_charsmap_keyidx);
13361338 const char * pc = (const char *) gguf_get_arr_data (ctx, precompiled_charsmap_keyidx);
1337- precompiled_charsmap.assign (pc, pc + n_precompiled_charsmap);
1339+ pimpl-> precompiled_charsmap .assign (pc, pc + n_precompiled_charsmap);
13381340#ifdef IS_BIG_ENDIAN
13391341 // correct endiannes of data in precompiled_charsmap binary blob
1340- uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0 ];
1342+ uint32_t * xcda_blob_size = (uint32_t *) &pimpl-> precompiled_charsmap [0 ];
13411343 *xcda_blob_size = __builtin_bswap32 (*xcda_blob_size);
13421344 assert (*xcda_blob_size + sizeof (uint32_t ) < n_precompiled_charsmap);
13431345 size_t xcda_array_size = *xcda_blob_size / sizeof (uint32_t );
1344- uint32_t * xcda_array = (uint32_t *) &precompiled_charsmap[sizeof (uint32_t )];
1346+ uint32_t * xcda_array = (uint32_t *) &pimpl-> precompiled_charsmap [sizeof (uint32_t )];
13451347 for (size_t i = 0 ; i < xcda_array_size; ++i) {
13461348 xcda_array[i] = __builtin_bswap32 (xcda_array[i]);
13471349 }
0 commit comments