@@ -53,6 +53,8 @@ struct llama_lora_adapter_container : llama_lora_adapter_info {
5353 struct llama_lora_adapter * adapter;
5454};
5555
56+ using llama_tokens = std::vector<llama_token>;
57+
5658// build info
5759extern int LLAMA_BUILD_NUMBER;
5860extern char const * LLAMA_COMMIT;
@@ -237,7 +239,7 @@ struct gpt_params {
237239 bool conversation = false ; // conversation mode (does not print special tokens and suffix/prefix)
238240 bool prompt_cache_all = false ; // save user input and generations to prompt cache
239241 bool prompt_cache_ro = false ; // open the prompt cache read-only and do not update it
240-
242+ bool ctx_shift = true ;
241243 bool escape = true ; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
242244 bool multiline_input = false ; // reverse the usage of `\`
243245 bool simple_io = false ; // improves compatibility with subprocesses and limited consoles
@@ -371,6 +373,9 @@ struct gpt_params {
371373 bool sweep_bench_output_jsonl = false ;
372374};
373375
376+
377+
378+ void gpt_params_handle_hf_token (gpt_params & params);
374379void gpt_params_parse_from_env (gpt_params & params);
375380void gpt_params_handle_model_default (gpt_params & params);
376381
@@ -381,6 +386,15 @@ void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
381386
382387std::string gpt_params_get_system_info (const gpt_params & params);
383388
389+
390+ struct common_remote_params {
391+ std::vector<std::string> headers;
392+ long timeout = 0 ; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
393+ long max_size = 0 ; // max size of the response ; unlimited if 0 ; max is 2GB
394+ };
395+ // get remote file content, returns <http_code, raw_response_body>
396+ std::pair<long , std::vector<char >> common_remote_get_content (const std::string& url, const common_remote_params& params);
397+
384398//
385399// String utils
386400//
@@ -497,6 +511,12 @@ std::vector<llama_token> llama_tokenize(
497511 bool add_special,
498512 bool parse_special = false );
499513
514+ std::vector<llama_token> llama_tokenize (
515+ const struct llama_vocab * vocab,
516+ const std::string& text,
517+ bool add_special,
518+ bool parse_special = false );
519+
500520// tokenizes a token into a piece, optionally renders special/control tokens
501521// should work similar to Python's `tokenizer.id_to_piece`
502522std::string llama_token_to_piece (
@@ -513,70 +533,16 @@ std::string llama_token_to_piece(
513533// should work similar to Python's `tokenizer.decode`
514534// optionally renders special/control tokens
515535std::string llama_detokenize (
516- llama_context * ctx,
536+ const llama_context * ctx,
517537 const std::vector<llama_token> & tokens,
518538 bool special = true );
519539
540+
520541// Uses the value from the model metadata if possible, otherwise
521542// defaults to true when model type is SPM, otherwise false.
522543bool llama_should_add_bos_token (const llama_model * model);
523544
524- //
525- // Chat template utils
526- //
527- // struct common_tool_call {
528- // std::string name;
529- // std::string arguments;
530- // std::string id;
531- // };
532- //
533- // // same with llama_chat_message, but uses std::string
534- // struct common_chat_msg {
535- // std::string role;
536- // std::string content;
537- // std::vector<common_tool_call> tool_calls;
538- // std::string reasoning_content = "";
539- // };
540-
541- // // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
542- // bool llama_chat_verify_template(const struct llama_model* , const std::string& tmpl, bool use_jinja);
543- //
544- // namespace minja {
545- // class chat_template;
546- // }
547- //
548- // typedef minja::chat_template common_chat_template;
549- //
550- // struct common_chat_templates {
551- // bool has_explicit_template; // Model had builtin template or template overridde was specified.
552- // std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
553- // std::unique_ptr<common_chat_template> template_tool_use;
554- // };
555- //
556- //
557- // // CPP wrapper for llama_chat_apply_template
558- // // If the built-in template is not supported, we default to chatml
559- // // If the custom "tmpl" is not supported, we throw an error
560- // std::string llama_chat_apply_template(
561- // const struct llama_model* model,
562- // const common_chat_template& tmpl,
563- // const std::vector< common_chat_msg>& chat,
564- // bool add_ass,
565- // bool use_jinja);
566- //
567- // // Format single message, while taking into account the position of that message in chat history
568- // std::string llama_chat_format_single(const struct llama_model* model,
569- // const common_chat_template& tmpl,
570- // const std::vector< common_chat_msg>& past_msg,
571- // const common_chat_msg& new_msg,
572- // bool add_ass,
573- // bool use_jinja);
574- //
575- // // Returns an example of formatted chat
576- // std::string llama_chat_format_example(const struct llama_model* model,
577- // const common_chat_template& tmpl, bool use_jinja);
578- //
579- // common_chat_templates llama_chat_templates_from_model(const struct llama_model* model, const std::string& chat_template_override);
545+
580546
581547
582548//
0 commit comments