@@ -1322,11 +1322,13 @@ struct llama_vocab::impl {
13221322 char * text,
13231323 int32_t text_len_max,
13241324 bool remove_special,
1325- bool unparse_special) const ;
1325+ bool unparse_special,
1326+ bool remove_space_prefix = true ) const ;
13261327
13271328 std::string detokenize (
13281329 const std::vector<llama_token> & tokens,
1329- bool special) const ;
1330+ bool special,
1331+ bool remove_space_prefix = true ) const ;
13301332
13311333 void print_info () const ;
13321334
@@ -2581,7 +2583,8 @@ int32_t llama_vocab::impl::detokenize(
25812583 char * text,
25822584 int32_t text_len_max,
25832585 bool remove_special,
2584- bool unparse_special) const {
2586+ bool unparse_special,
2587+ bool remove_space_prefix) const {
25852588 if (type == LLAMA_VOCAB_TYPE_NONE) {
25862589 return 0 ;
25872590 }
@@ -2592,7 +2595,7 @@ int32_t llama_vocab::impl::detokenize(
25922595 int32_t total = 0 ;
25932596
25942597 // remove the leading space
2595- bool remove_space = add_space_prefix;
2598+ bool remove_space = add_space_prefix && remove_space_prefix ;
25962599
25972600 if (remove_special && add_bos) {
25982601 if (n_tokens > 0 && tokens[0 ] == special_bos_id) {
@@ -2991,17 +2994,18 @@ int32_t llama_vocab::detokenize(
29912994 char * text,
29922995 int32_t text_len_max,
29932996 bool remove_special,
2994- bool unparse_special) const {
2995- return pimpl->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
2997+ bool unparse_special,
2998+ bool remove_space_prefix) const {
2999+ return pimpl->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special, remove_space_prefix);
29963000}
29973001
2998- std::string llama_vocab::detokenize (const std::vector<llama_token> & tokens, bool special) const {
3002+ std::string llama_vocab::detokenize (const std::vector<llama_token> & tokens, bool special, bool remove_space_prefix ) const {
29993003 std::string text;
30003004 text.resize (std::max (text.capacity (), tokens.size ()));
3001- int32_t n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special);
3005+ int32_t n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special, remove_space_prefix );
30023006 if (n_chars < 0 ) {
30033007 text.resize (-n_chars);
3004- n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special);
3008+ n_chars = detokenize (tokens.data (), (int32_t )tokens.size (), &text[0 ], (int32_t )text.size (), false , special, remove_space_prefix );
30053009 GGML_ASSERT (n_chars <= (int32_t )text.size ()); // whitespace trimming is performed after per-token detokenization
30063010 }
30073011
@@ -3246,7 +3250,8 @@ int32_t llama_detokenize(
32463250 char * text,
32473251 int32_t text_len_max,
32483252 bool remove_special,
3249- bool unparse_special) {
3250- return vocab->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special);
3253+ bool unparse_special,
3254+ bool remove_space_prefix) {
3255+ return vocab->detokenize (tokens, n_tokens, text, text_len_max, remove_special, unparse_special, remove_space_prefix);
32513256}
32523257
0 commit comments