@@ -1952,214 +1952,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
19521952
19531953 return result;
19541954}
1955-
1956- //
1957- // YAML utils
1958- //
1959-
1960- void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float > & data) {
1961- if (data.empty ()) {
1962- fprintf (stream, " %s:\n " , prop_name);
1963- return ;
1964- }
1965-
1966- fprintf (stream, " %s: [" , prop_name);
1967- for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1968- fprintf (stream, " %e, " , data[i]);
1969- }
1970- fprintf (stream, " %e]\n " , data.back ());
1971- }
1972-
1973- void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int > & data) {
1974- if (data.empty ()) {
1975- fprintf (stream, " %s:\n " , prop_name);
1976- return ;
1977- }
1978-
1979- fprintf (stream, " %s: [" , prop_name);
1980- for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1981- fprintf (stream, " %d, " , data[i]);
1982- }
1983- fprintf (stream, " %d]\n " , data.back ());
1984- }
1985-
1986- void yaml_dump_string_multiline (FILE * stream, const char * prop_name, const char * data) {
1987- std::string data_str (data == NULL ? " " : data);
1988-
1989- if (data_str.empty ()) {
1990- fprintf (stream, " %s:\n " , prop_name);
1991- return ;
1992- }
1993-
1994- size_t pos_start = 0 ;
1995- size_t pos_found = 0 ;
1996-
1997- if (std::isspace (data_str[0 ]) || std::isspace (data_str.back ())) {
1998- data_str = std::regex_replace (data_str, std::regex (" \n " ), " \\ n" );
1999- data_str = std::regex_replace (data_str, std::regex (" \" " ), " \\\" " );
2000- data_str = std::regex_replace (data_str, std::regex (R"( \\[^n"])" ), R"( \$&)" );
2001- data_str = " \" " + data_str + " \" " ;
2002- fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
2003- return ;
2004- }
2005-
2006- if (data_str.find (' \n ' ) == std::string::npos) {
2007- fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
2008- return ;
2009- }
2010-
2011- fprintf (stream, " %s: |\n " , prop_name);
2012- while ((pos_found = data_str.find (' \n ' , pos_start)) != std::string::npos) {
2013- fprintf (stream, " %s\n " , data_str.substr (pos_start, pos_found-pos_start).c_str ());
2014- pos_start = pos_found + 1 ;
2015- }
2016- }
2017-
2018- void yaml_dump_non_result_info (FILE * stream, const common_params & params, const llama_context * lctx,
2019- const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
2020- ggml_cpu_init (); // some ARM features are detected at runtime
2021-
2022- const auto & sparams = params.sampling ;
2023-
2024- fprintf (stream, " build_commit: %s\n " , LLAMA_COMMIT);
2025- fprintf (stream, " build_number: %d\n " , LLAMA_BUILD_NUMBER);
2026- fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
2027- fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
2028- fprintf (stream, " cpu_has_avx_vnni: %s\n " , ggml_cpu_has_avx_vnni () ? " true" : " false" );
2029- fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
2030- fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
2031- fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
2032- fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
2033- fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
2034- fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
2035- fprintf (stream, " cpu_has_sve: %s\n " , ggml_cpu_has_sve () ? " true" : " false" );
2036- fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
2037- fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
2038- fprintf (stream, " cpu_has_riscv_v: %s\n " , ggml_cpu_has_riscv_v () ? " true" : " false" );
2039- fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
2040- fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
2041- fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
2042- fprintf (stream, " cpu_has_matmul_int8: %s\n " , ggml_cpu_has_matmul_int8 () ? " true" : " false" );
2043-
2044- #ifdef NDEBUG
2045- fprintf (stream, " debug: false\n " );
2046- #else
2047- fprintf (stream, " debug: true\n " );
2048- #endif // NDEBUG
2049-
2050- fprintf (stream, " model_desc: %s\n " , model_desc);
2051- fprintf (stream, " n_vocab: %d # output size of the final layer, 32001 for some models\n " , llama_n_vocab (llama_get_model (lctx)));
2052-
2053- #ifdef __OPTIMIZE__
2054- fprintf (stream, " optimize: true\n " );
2055- #else
2056- fprintf (stream, " optimize: false\n " );
2057- #endif // __OPTIMIZE__
2058-
2059- fprintf (stream, " time: %s\n " , timestamp.c_str ());
2060-
2061- fprintf (stream, " \n " );
2062- fprintf (stream, " ###############\n " );
2063- fprintf (stream, " # User Inputs #\n " );
2064- fprintf (stream, " ###############\n " );
2065- fprintf (stream, " \n " );
2066-
2067- fprintf (stream, " alias: %s # default: unknown\n " , params.model_alias .c_str ());
2068- fprintf (stream, " batch_size: %d # default: 512\n " , params.n_batch );
2069- fprintf (stream, " chunks: %d # default: -1 (unlimited)\n " , params.n_chunks );
2070- fprintf (stream, " color: %s # default: false\n " , params.use_color ? " true" : " false" );
2071- fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
2072- fprintf (stream, " dry_allowed_length: %d # default: 2\n " , sparams.dry_allowed_length );
2073- fprintf (stream, " dry_base: %.2f # default: 1.75\n " , sparams.dry_base );
2074- fprintf (stream, " dry_multiplier: %.1f # default: 0.0\n " , sparams.dry_multiplier );
2075- fprintf (stream, " dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n " , sparams.dry_penalty_last_n );
2076- fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
2077- fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
2078- fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
2079- yaml_dump_string_multiline (stream, " grammar" , sparams.grammar .c_str ());
2080- fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
2081- fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
2082- fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
2083- fprintf (stream, " ignore_eos: %s # default: false\n " , sparams.ignore_eos ? " true" : " false" );
2084-
2085- yaml_dump_string_multiline (stream, " in_prefix" , params.input_prefix .c_str ());
2086- fprintf (stream, " in_prefix_bos: %s # default: false\n " , params.input_prefix_bos ? " true" : " false" );
2087- yaml_dump_string_multiline (stream, " in_suffix" , params.input_prefix .c_str ());
2088- fprintf (stream, " interactive: %s # default: false\n " , params.interactive ? " true" : " false" );
2089- fprintf (stream, " interactive_first: %s # default: false\n " , params.interactive_first ? " true" : " false" );
2090- fprintf (stream, " keep: %d # default: 0\n " , params.n_keep );
2091- // fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str());
2092-
2093- fprintf (stream, " logit_bias:\n " );
2094- for (const auto & logit_bias : sparams.logit_bias ) {
2095- fprintf (stream, " %d: %f" , logit_bias.token , logit_bias.bias );
2096- }
2097-
2098- fprintf (stream, " lora:\n " );
2099- for (auto & la : params.lora_adapters ) {
2100- if (la.scale == 1 .0f ) {
2101- fprintf (stream, " - %s\n " , la.path .c_str ());
2102- }
2103- }
2104- fprintf (stream, " lora_scaled:\n " );
2105- for (auto & la : params.lora_adapters ) {
2106- if (la.scale != 1 .0f ) {
2107- fprintf (stream, " - %s: %f\n " , la.path .c_str (), la.scale );
2108- }
2109- }
2110- fprintf (stream, " lora_init_without_apply: %s # default: false\n " , params.lora_init_without_apply ? " true" : " false" );
2111- fprintf (stream, " main_gpu: %d # default: 0\n " , params.main_gpu );
2112- fprintf (stream, " min_keep: %d # default: 0 (disabled)\n " , sparams.min_keep );
2113- fprintf (stream, " mirostat: %d # default: 0 (disabled)\n " , sparams.mirostat );
2114- fprintf (stream, " mirostat_ent: %f # default: 5.0\n " , sparams.mirostat_tau );
2115- fprintf (stream, " mirostat_lr: %f # default: 0.1\n " , sparams.mirostat_eta );
2116- fprintf (stream, " mlock: %s # default: false\n " , params.use_mlock ? " true" : " false" );
2117- fprintf (stream, " model: %s # default: %s\n " , params.model .c_str (), DEFAULT_MODEL_PATH);
2118- // fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str());
2119- fprintf (stream, " multiline_input: %s # default: false\n " , params.multiline_input ? " true" : " false" );
2120- fprintf (stream, " n_gpu_layers: %d # default: -1\n " , params.n_gpu_layers );
2121- fprintf (stream, " n_predict: %d # default: -1 (unlimited)\n " , params.n_predict );
2122- fprintf (stream, " n_probs: %d # only used by server binary, default: 0\n " , sparams.n_probs );
2123- fprintf (stream, " no_mmap: %s # default: false\n " , !params.use_mmap ? " true" : " false" );
2124- fprintf (stream, " penalize_nl: %s # default: false\n " , sparams.penalize_nl ? " true" : " false" );
2125- fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
2126- fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
2127- fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
2128- yaml_dump_string_multiline (stream, " prompt" , params.prompt .c_str ());
2129- fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
2130- fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
2131- fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
2132- yaml_dump_vector_int (stream, " prompt_tokens" , prompt_tokens);
2133- fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
2134-
2135- fprintf (stream, " reverse_prompt:\n " );
2136- for (std::string ap : params.antiprompt ) {
2137- size_t pos = 0 ;
2138- while ((pos = ap.find (' \n ' , pos)) != std::string::npos) {
2139- ap.replace (pos, 1 , " \\ n" );
2140- pos += 1 ;
2141- }
2142-
2143- fprintf (stream, " - %s\n " , ap.c_str ());
2144- }
2145-
2146- fprintf (stream, " rope_freq_base: %f # default: 10000.0\n " , params.rope_freq_base );
2147- fprintf (stream, " rope_freq_scale: %f # default: 1.0\n " , params.rope_freq_scale );
2148- fprintf (stream, " simple_io: %s # default: false\n " , params.simple_io ? " true" : " false" );
2149- fprintf (stream, " cont_batching: %s # default: false\n " , params.cont_batching ? " true" : " false" );
2150- fprintf (stream, " flash_attn: %s # default: false\n " , params.flash_attn ? " true" : " false" );
2151- fprintf (stream, " temp: %f # default: 0.8\n " , sparams.temp );
2152-
2153- const std::vector<float > tensor_split_vector (params.tensor_split , params.tensor_split + llama_max_devices ());
2154- yaml_dump_vector_float (stream, " tensor_split" , tensor_split_vector);
2155-
2156- fprintf (stream, " threads: %d # default: %u\n " , params.cpuparams .n_threads , std::thread::hardware_concurrency ());
2157- fprintf (stream, " top_k: %d # default: 40\n " , sparams.top_k );
2158- fprintf (stream, " top_p: %f # default: 0.95\n " , sparams.top_p );
2159- fprintf (stream, " min_p: %f # default: 0.0\n " , sparams.min_p );
2160- fprintf (stream, " xtc_probability: %f # default: 0.0\n " , sparams.xtc_probability );
2161- fprintf (stream, " xtc_threshold: %f # default: 0.1\n " , sparams.xtc_threshold );
2162- fprintf (stream, " typ_p: %f # default: 1.0\n " , sparams.typ_p );
2163- fprintf (stream, " verbose_prompt: %s # default: false\n " , params.verbose_prompt ? " true" : " false" );
2164- fprintf (stream, " display_prompt: %s # default: true\n " , params.display_prompt ? " true" : " false" );
2165- }
0 commit comments