@@ -53,8 +53,6 @@ struct llama_lora_adapter_container : llama_lora_adapter_info {
5353 struct llama_lora_adapter * adapter;
5454};
5555
56- using llama_tokens = std::vector<llama_token>;
57-
5856// build info
5957extern int LLAMA_BUILD_NUMBER;
6058extern char const * LLAMA_COMMIT;
@@ -239,7 +237,7 @@ struct gpt_params {
239237 bool conversation = false ; // conversation mode (does not print special tokens and suffix/prefix)
240238 bool prompt_cache_all = false ; // save user input and generations to prompt cache
241239 bool prompt_cache_ro = false ; // open the prompt cache read-only and do not update it
242- bool ctx_shift = true ;
240+
243241 bool escape = true ; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
244242 bool multiline_input = false ; // reverse the usage of `\`
245243 bool simple_io = false ; // improves compatibility with subprocesses and limited consoles
@@ -373,9 +371,6 @@ struct gpt_params {
373371 bool sweep_bench_output_jsonl = false ;
374372};
375373
376-
377-
378- void gpt_params_handle_hf_token (gpt_params & params);
379374void gpt_params_parse_from_env (gpt_params & params);
380375void gpt_params_handle_model_default (gpt_params & params);
381376
@@ -386,15 +381,6 @@ void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
386381
387382std::string gpt_params_get_system_info (const gpt_params & params);
388383
389-
390- struct common_remote_params {
391- std::vector<std::string> headers;
392- long timeout = 0 ; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
393- long max_size = 0 ; // max size of the response ; unlimited if 0 ; max is 2GB
394- };
395- // get remote file content, returns <http_code, raw_response_body>
396- std::pair<long , std::vector<char >> common_remote_get_content (const std::string& url, const common_remote_params& params);
397-
398384//
399385// String utils
400386//
@@ -511,12 +497,6 @@ std::vector<llama_token> llama_tokenize(
511497 bool add_special,
512498 bool parse_special = false );
513499
514- std::vector<llama_token> llama_tokenize (
515- const struct llama_vocab * vocab,
516- const std::string& text,
517- bool add_special,
518- bool parse_special = false );
519-
520500// tokenizes a token into a piece, optionally renders special/control tokens
521501// should work similar to Python's `tokenizer.id_to_piece`
522502std::string llama_token_to_piece (
@@ -533,16 +513,70 @@ std::string llama_token_to_piece(
533513// should work similar to Python's `tokenizer.decode`
534514// optionally renders special/control tokens
535515std::string llama_detokenize (
536- const llama_context * ctx,
516+ llama_context * ctx,
537517 const std::vector<llama_token> & tokens,
538518 bool special = true );
539519
540-
541520// Uses the value from the model metadata if possible, otherwise
542521// defaults to true when model type is SPM, otherwise false.
543522bool llama_should_add_bos_token (const llama_model * model);
544523
545-
524+ //
525+ // Chat template utils
526+ //
527+ // struct common_tool_call {
528+ // std::string name;
529+ // std::string arguments;
530+ // std::string id;
531+ // };
532+ //
533+ // // same with llama_chat_message, but uses std::string
534+ // struct common_chat_msg {
535+ // std::string role;
536+ // std::string content;
537+ // std::vector<common_tool_call> tool_calls;
538+ // std::string reasoning_content = "";
539+ // };
540+
541+ // // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
542+ // bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
543+ //
544+ // namespace minja {
545+ // class chat_template;
546+ // }
547+ //
548+ // typedef minja::chat_template common_chat_template;
549+ //
550+ // struct common_chat_templates {
551+ // bool has_explicit_template; // Model had builtin template or template overridde was specified.
552+ // std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
553+ // std::unique_ptr<common_chat_template> template_tool_use;
554+ // };
555+ //
556+ //
557+ // // CPP wrapper for llama_chat_apply_template
558+ // // If the built-in template is not supported, we default to chatml
559+ // // If the custom "tmpl" is not supported, we throw an error
560+ // std::string llama_chat_apply_template(
561+ // const struct llama_model* model,
562+ // const common_chat_template& tmpl,
563+ // const std::vector< common_chat_msg>& chat,
564+ // bool add_ass,
565+ // bool use_jinja);
566+ //
567+ // // Format single message, while taking into account the position of that message in chat history
568+ // std::string llama_chat_format_single(const struct llama_model* model,
569+ // const common_chat_template& tmpl,
570+ // const std::vector< common_chat_msg>& past_msg,
571+ // const common_chat_msg& new_msg,
572+ // bool add_ass,
573+ // bool use_jinja);
574+ //
575+ // // Returns an example of formatted chat
576+ // std::string llama_chat_format_example(const struct llama_model* model,
577+ // const common_chat_template& tmpl, bool use_jinja);
578+ //
579+ // common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
546580
547581
548582//
0 commit comments