@@ -53,8 +53,6 @@ struct llama_lora_adapter_container : llama_lora_adapter_info {
5353 struct llama_lora_adapter * adapter;
5454};
5555
56- using llama_tokens = std::vector<llama_token>;
57-
5856// build info
5957extern int LLAMA_BUILD_NUMBER;
6058extern char const * LLAMA_COMMIT;
@@ -240,7 +238,7 @@ struct gpt_params {
240238 bool conversation = false ; // conversation mode (does not print special tokens and suffix/prefix)
241239 bool prompt_cache_all = false ; // save user input and generations to prompt cache
242240 bool prompt_cache_ro = false ; // open the prompt cache read-only and do not update it
243- bool ctx_shift = true ;
241+
244242 bool escape = true ; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
245243 bool multiline_input = false ; // reverse the usage of `\`
246244 bool simple_io = false ; // improves compatibility with subprocesses and limited consoles
@@ -374,9 +372,6 @@ struct gpt_params {
374372 bool sweep_bench_output_jsonl = false ;
375373};
376374
377-
378-
379- void gpt_params_handle_hf_token (gpt_params & params);
380375void gpt_params_parse_from_env (gpt_params & params);
381376void gpt_params_handle_model_default (gpt_params & params);
382377
@@ -387,15 +382,6 @@ void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
387382
388383std::string gpt_params_get_system_info (const gpt_params & params);
389384
390-
391- struct common_remote_params {
392- std::vector<std::string> headers;
393- long timeout = 0 ; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
394- long max_size = 0 ; // max size of the response ; unlimited if 0 ; max is 2GB
395- };
396- // get remote file content, returns <http_code, raw_response_body>
397- std::pair<long , std::vector<char >> common_remote_get_content (const std::string& url, const common_remote_params& params);
398-
399385//
400386// String utils
401387//
@@ -512,12 +498,6 @@ std::vector<llama_token> llama_tokenize(
512498 bool add_special,
513499 bool parse_special = false );
514500
515- std::vector<llama_token> llama_tokenize (
516- const struct llama_vocab * vocab,
517- const std::string& text,
518- bool add_special,
519- bool parse_special = false );
520-
521501// tokenizes a token into a piece, optionally renders special/control tokens
522502// should work similar to Python's `tokenizer.id_to_piece`
523503std::string llama_token_to_piece (
@@ -534,16 +514,70 @@ std::string llama_token_to_piece(
534514// should work similar to Python's `tokenizer.decode`
535515// optionally renders special/control tokens
536516std::string llama_detokenize (
537- const llama_context * ctx,
517+ llama_context * ctx,
538518 const std::vector<llama_token> & tokens,
539519 bool special = true );
540520
541-
542521// Uses the value from the model metadata if possible, otherwise
543522// defaults to true when model type is SPM, otherwise false.
544523bool llama_should_add_bos_token (const llama_model * model);
545524
546-
525+ //
526+ // Chat template utils
527+ //
528+ // struct common_tool_call {
529+ // std::string name;
530+ // std::string arguments;
531+ // std::string id;
532+ // };
533+ //
534+ // // same with llama_chat_message, but uses std::string
535+ // struct common_chat_msg {
536+ // std::string role;
537+ // std::string content;
538+ // std::vector<common_tool_call> tool_calls;
539+ // std::string reasoning_content = "";
540+ // };
541+
542+ // // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
543+ // bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
544+ //
545+ // namespace minja {
546+ // class chat_template;
547+ // }
548+ //
549+ // typedef minja::chat_template common_chat_template;
550+ //
551+ // struct common_chat_templates {
552+ // bool has_explicit_template; // Model had builtin template or template overridde was specified.
553+ // std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
554+ // std::unique_ptr<common_chat_template> template_tool_use;
555+ // };
556+ //
557+ //
558+ // // CPP wrapper for llama_chat_apply_template
559+ // // If the built-in template is not supported, we default to chatml
560+ // // If the custom "tmpl" is not supported, we throw an error
561+ // std::string llama_chat_apply_template(
562+ // const struct llama_model* model,
563+ // const common_chat_template& tmpl,
564+ // const std::vector< common_chat_msg>& chat,
565+ // bool add_ass,
566+ // bool use_jinja);
567+ //
568+ // // Format single message, while taking into account the position of that message in chat history
569+ // std::string llama_chat_format_single(const struct llama_model* model,
570+ // const common_chat_template& tmpl,
571+ // const std::vector< common_chat_msg>& past_msg,
572+ // const common_chat_msg& new_msg,
573+ // bool add_ass,
574+ // bool use_jinja);
575+ //
576+ // // Returns an example of formatted chat
577+ // std::string llama_chat_format_example(const struct llama_model* model,
578+ // const common_chat_template& tmpl, bool use_jinja);
579+ //
580+ // common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
547581
548582
549583//
0 commit comments