@@ -53,8 +53,6 @@ struct llama_lora_adapter_container : llama_lora_adapter_info {
5353 struct llama_lora_adapter * adapter;
5454};
5555
56- using llama_tokens = std::vector<llama_token>;
57-
5856// build info
5957extern int LLAMA_BUILD_NUMBER;
6058extern char const * LLAMA_COMMIT;
@@ -240,7 +238,7 @@ struct gpt_params {
240238 bool conversation = false ; // conversation mode (does not print special tokens and suffix/prefix)
241239 bool prompt_cache_all = false ; // save user input and generations to prompt cache
242240 bool prompt_cache_ro = false ; // open the prompt cache read-only and do not update it
243- bool ctx_shift = true ;
241+
244242 bool escape = true ; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
245243 bool multiline_input = false ; // reverse the usage of `\`
246244 bool simple_io = false ; // improves compatibility with subprocesses and limited consoles
@@ -375,9 +373,6 @@ struct gpt_params {
375373 bool sweep_bench_output_jsonl = false ;
376374};
377375
378-
379-
380- void gpt_params_handle_hf_token (gpt_params & params);
381376void gpt_params_parse_from_env (gpt_params & params);
382377void gpt_params_handle_model_default (gpt_params & params);
383378
@@ -388,15 +383,6 @@ void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
388383
389384std::string gpt_params_get_system_info (const gpt_params & params);
390385
391-
392- struct common_remote_params {
393- std::vector<std::string> headers;
394- long timeout = 0 ; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
395- long max_size = 0 ; // max size of the response ; unlimited if 0 ; max is 2GB
396- };
397- // get remote file content, returns <http_code, raw_response_body>
398- std::pair<long , std::vector<char >> common_remote_get_content (const std::string& url, const common_remote_params& params);
399-
400386//
401387// String utils
402388//
@@ -513,12 +499,6 @@ std::vector<llama_token> llama_tokenize(
513499 bool add_special,
514500 bool parse_special = false );
515501
516- std::vector<llama_token> llama_tokenize (
517- const struct llama_vocab * vocab,
518- const std::string& text,
519- bool add_special,
520- bool parse_special = false );
521-
522502// tokenizes a token into a piece, optionally renders special/control tokens
523503// should work similar to Python's `tokenizer.id_to_piece`
524504std::string llama_token_to_piece (
@@ -535,16 +515,70 @@ std::string llama_token_to_piece(
535515// should work similar to Python's `tokenizer.decode`
536516// optionally renders special/control tokens
537517std::string llama_detokenize (
538- const llama_context * ctx,
518+ llama_context * ctx,
539519 const std::vector<llama_token> & tokens,
540520 bool special = true );
541521
542-
543522// Uses the value from the model metadata if possible, otherwise
544523// defaults to true when model type is SPM, otherwise false.
545524bool llama_should_add_bos_token (const llama_model * model);
546525
547-
526+ //
527+ // Chat template utils
528+ //
529+ // struct common_tool_call {
530+ // std::string name;
531+ // std::string arguments;
532+ // std::string id;
533+ // };
534+ //
535+ // // same with llama_chat_message, but uses std::string
536+ // struct common_chat_msg {
537+ // std::string role;
538+ // std::string content;
539+ // std::vector<common_tool_call> tool_calls;
540+ // std::string reasoning_content = "";
541+ // };
542+
543+ // // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
544+ // bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
545+ //
546+ // namespace minja {
547+ // class chat_template;
548+ // }
549+ //
550+ // typedef minja::chat_template common_chat_template;
551+ //
552+ // struct common_chat_templates {
553+ // bool has_explicit_template; // Model had builtin template or template overridde was specified.
554+ // std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
555+ // std::unique_ptr<common_chat_template> template_tool_use;
556+ // };
557+ //
558+ //
559+ // // CPP wrapper for llama_chat_apply_template
560+ // // If the built-in template is not supported, we default to chatml
561+ // // If the custom "tmpl" is not supported, we throw an error
562+ // std::string llama_chat_apply_template(
563+ // const struct llama_model* model,
564+ // const common_chat_template& tmpl,
565+ // const std::vector< common_chat_msg>& chat,
566+ // bool add_ass,
567+ // bool use_jinja);
568+ //
569+ // // Format single message, while taking into account the position of that message in chat history
570+ // std::string llama_chat_format_single(const struct llama_model* model,
571+ // const common_chat_template& tmpl,
572+ // const std::vector< common_chat_msg>& past_msg,
573+ // const common_chat_msg& new_msg,
574+ // bool add_ass,
575+ // bool use_jinja);
576+ //
577+ // // Returns an example of formatted chat
578+ // std::string llama_chat_format_example(const struct llama_model* model,
579+ // const common_chat_template& tmpl, bool use_jinja);
580+ //
581+ // common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
548582
549583
550584//
0 commit comments