@@ -1959,3 +1959,213 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
19591959 return result;
19601960}
19611961
1962+ //
1963+ // YAML utils
1964+ //
1965+
1966+ void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float > & data) {
1967+ if (data.empty ()) {
1968+ fprintf (stream, " %s:\n " , prop_name);
1969+ return ;
1970+ }
1971+
1972+ fprintf (stream, " %s: [" , prop_name);
1973+ for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1974+ fprintf (stream, " %e, " , data[i]);
1975+ }
1976+ fprintf (stream, " %e]\n " , data.back ());
1977+ }
1978+
1979+ void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int > & data) {
1980+ if (data.empty ()) {
1981+ fprintf (stream, " %s:\n " , prop_name);
1982+ return ;
1983+ }
1984+
1985+ fprintf (stream, " %s: [" , prop_name);
1986+ for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1987+ fprintf (stream, " %d, " , data[i]);
1988+ }
1989+ fprintf (stream, " %d]\n " , data.back ());
1990+ }
1991+
1992+ void yaml_dump_string_multiline (FILE * stream, const char * prop_name, const char * data) {
1993+ std::string data_str (data == NULL ? " " : data);
1994+
1995+ if (data_str.empty ()) {
1996+ fprintf (stream, " %s:\n " , prop_name);
1997+ return ;
1998+ }
1999+
2000+ size_t pos_start = 0 ;
2001+ size_t pos_found = 0 ;
2002+
2003+ if (std::isspace (data_str[0 ]) || std::isspace (data_str.back ())) {
2004+ data_str = std::regex_replace (data_str, std::regex (" \n " ), " \\ n" );
2005+ data_str = std::regex_replace (data_str, std::regex (" \" " ), " \\\" " );
2006+ data_str = std::regex_replace (data_str, std::regex (R"( \\[^n"])" ), R"( \$&)" );
2007+ data_str = " \" " + data_str + " \" " ;
2008+ fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
2009+ return ;
2010+ }
2011+
2012+ if (data_str.find (' \n ' ) == std::string::npos) {
2013+ fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
2014+ return ;
2015+ }
2016+
2017+ fprintf (stream, " %s: |\n " , prop_name);
2018+ while ((pos_found = data_str.find (' \n ' , pos_start)) != std::string::npos) {
2019+ fprintf (stream, " %s\n " , data_str.substr (pos_start, pos_found-pos_start).c_str ());
2020+ pos_start = pos_found + 1 ;
2021+ }
2022+ }
2023+
2024+ void yaml_dump_non_result_info (FILE * stream, const common_params & params, const llama_context * lctx,
2025+ const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
2026+ ggml_cpu_init (); // some ARM features are detected at runtime
2027+
2028+ const auto & sparams = params.sparams ;
2029+
2030+ fprintf (stream, " build_commit: %s\n " , LLAMA_COMMIT);
2031+ fprintf (stream, " build_number: %d\n " , LLAMA_BUILD_NUMBER);
2032+ fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
2033+ fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
2034+ fprintf (stream, " cpu_has_avx_vnni: %s\n " , ggml_cpu_has_avx_vnni () ? " true" : " false" );
2035+ fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
2036+ fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
2037+ fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
2038+ fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
2039+ fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
2040+ fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
2041+ fprintf (stream, " cpu_has_sve: %s\n " , ggml_cpu_has_sve () ? " true" : " false" );
2042+ fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
2043+ fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
2044+ fprintf (stream, " cpu_has_riscv_v: %s\n " , ggml_cpu_has_riscv_v () ? " true" : " false" );
2045+ fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
2046+ fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
2047+ fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
2048+ fprintf (stream, " cpu_has_matmul_int8: %s\n " , ggml_cpu_has_matmul_int8 () ? " true" : " false" );
2049+
2050+ #ifdef NDEBUG
2051+ fprintf (stream, " debug: false\n " );
2052+ #else
2053+ fprintf (stream, " debug: true\n " );
2054+ #endif // NDEBUG
2055+
2056+ fprintf (stream, " model_desc: %s\n " , model_desc);
2057+ fprintf (stream, " n_vocab: %d # output size of the final layer, 32001 for some models\n " , llama_n_vocab (llama_get_model (lctx)));
2058+
2059+ #ifdef __OPTIMIZE__
2060+ fprintf (stream, " optimize: true\n " );
2061+ #else
2062+ fprintf (stream, " optimize: false\n " );
2063+ #endif // __OPTIMIZE__
2064+
2065+ fprintf (stream, " time: %s\n " , timestamp.c_str ());
2066+
2067+ fprintf (stream, " \n " );
2068+ fprintf (stream, " ###############\n " );
2069+ fprintf (stream, " # User Inputs #\n " );
2070+ fprintf (stream, " ###############\n " );
2071+ fprintf (stream, " \n " );
2072+
2073+ fprintf (stream, " alias: %s # default: unknown\n " , params.model_alias .c_str ());
2074+ fprintf (stream, " batch_size: %d # default: 512\n " , params.n_batch );
2075+ fprintf (stream, " chunks: %d # default: -1 (unlimited)\n " , params.n_chunks );
2076+ fprintf (stream, " color: %s # default: false\n " , params.use_color ? " true" : " false" );
2077+ fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
2078+ fprintf (stream, " dry_allowed_length: %d # default: 2\n " , sparams.dry_allowed_length );
2079+ fprintf (stream, " dry_base: %.2f # default: 1.75\n " , sparams.dry_base );
2080+ fprintf (stream, " dry_multiplier: %.1f # default: 0.0\n " , sparams.dry_multiplier );
2081+ fprintf (stream, " dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n " , sparams.dry_penalty_last_n );
2082+ fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
2083+ fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
2084+ fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
2085+ yaml_dump_string_multiline (stream, " grammar" , sparams.grammar .c_str ());
2086+ fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
2087+ fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
2088+ fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
2089+ fprintf (stream, " ignore_eos: %s # default: false\n " , sparams.ignore_eos ? " true" : " false" );
2090+
2091+ yaml_dump_string_multiline (stream, " in_prefix" , params.input_prefix .c_str ());
2092+ fprintf (stream, " in_prefix_bos: %s # default: false\n " , params.input_prefix_bos ? " true" : " false" );
2093+ yaml_dump_string_multiline (stream, " in_suffix" , params.input_prefix .c_str ());
2094+ fprintf (stream, " interactive: %s # default: false\n " , params.interactive ? " true" : " false" );
2095+ fprintf (stream, " interactive_first: %s # default: false\n " , params.interactive_first ? " true" : " false" );
2096+ fprintf (stream, " keep: %d # default: 0\n " , params.n_keep );
2097+ fprintf (stream, " logdir: %s # default: unset (no logging)\n " , params.logdir .c_str ());
2098+
2099+ fprintf (stream, " logit_bias:\n " );
2100+ for (const auto & logit_bias : sparams.logit_bias ) {
2101+ fprintf (stream, " %d: %f" , logit_bias.token , logit_bias.bias );
2102+ }
2103+
2104+ fprintf (stream, " lora:\n " );
2105+ for (auto & la : params.lora_adapters ) {
2106+ if (la.scale == 1 .0f ) {
2107+ fprintf (stream, " - %s\n " , la.path .c_str ());
2108+ }
2109+ }
2110+ fprintf (stream, " lora_scaled:\n " );
2111+ for (auto & la : params.lora_adapters ) {
2112+ if (la.scale != 1 .0f ) {
2113+ fprintf (stream, " - %s: %f\n " , la.path .c_str (), la.scale );
2114+ }
2115+ }
2116+ fprintf (stream, " lora_init_without_apply: %s # default: false\n " , params.lora_init_without_apply ? " true" : " false" );
2117+ fprintf (stream, " main_gpu: %d # default: 0\n " , params.main_gpu );
2118+ fprintf (stream, " min_keep: %d # default: 0 (disabled)\n " , sparams.min_keep );
2119+ fprintf (stream, " mirostat: %d # default: 0 (disabled)\n " , sparams.mirostat );
2120+ fprintf (stream, " mirostat_ent: %f # default: 5.0\n " , sparams.mirostat_tau );
2121+ fprintf (stream, " mirostat_lr: %f # default: 0.1\n " , sparams.mirostat_eta );
2122+ fprintf (stream, " mlock: %s # default: false\n " , params.use_mlock ? " true" : " false" );
2123+ fprintf (stream, " model: %s # default: %s\n " , params.model .c_str (), DEFAULT_MODEL_PATH);
2124+ fprintf (stream, " model_draft: %s # default:\n " , params.model_draft .c_str ());
2125+ fprintf (stream, " multiline_input: %s # default: false\n " , params.multiline_input ? " true" : " false" );
2126+ fprintf (stream, " n_gpu_layers: %d # default: -1\n " , params.n_gpu_layers );
2127+ fprintf (stream, " n_predict: %d # default: -1 (unlimited)\n " , params.n_predict );
2128+ fprintf (stream, " n_probs: %d # only used by server binary, default: 0\n " , sparams.n_probs );
2129+ fprintf (stream, " no_mmap: %s # default: false\n " , !params.use_mmap ? " true" : " false" );
2130+ fprintf (stream, " penalize_nl: %s # default: false\n " , sparams.penalize_nl ? " true" : " false" );
2131+ fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
2132+ fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
2133+ fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
2134+ yaml_dump_string_multiline (stream, " prompt" , params.prompt .c_str ());
2135+ fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
2136+ fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
2137+ fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
2138+ yaml_dump_vector_int (stream, " prompt_tokens" , prompt_tokens);
2139+ fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
2140+
2141+ fprintf (stream, " reverse_prompt:\n " );
2142+ for (std::string ap : params.antiprompt ) {
2143+ size_t pos = 0 ;
2144+ while ((pos = ap.find (' \n ' , pos)) != std::string::npos) {
2145+ ap.replace (pos, 1 , " \\ n" );
2146+ pos += 1 ;
2147+ }
2148+
2149+ fprintf (stream, " - %s\n " , ap.c_str ());
2150+ }
2151+
2152+ fprintf (stream, " rope_freq_base: %f # default: 10000.0\n " , params.rope_freq_base );
2153+ fprintf (stream, " rope_freq_scale: %f # default: 1.0\n " , params.rope_freq_scale );
2154+ fprintf (stream, " simple_io: %s # default: false\n " , params.simple_io ? " true" : " false" );
2155+ fprintf (stream, " cont_batching: %s # default: false\n " , params.cont_batching ? " true" : " false" );
2156+ fprintf (stream, " flash_attn: %s # default: false\n " , params.flash_attn ? " true" : " false" );
2157+ fprintf (stream, " temp: %f # default: 0.8\n " , sparams.temp );
2158+
2159+ const std::vector<float > tensor_split_vector (params.tensor_split , params.tensor_split + llama_max_devices ());
2160+ yaml_dump_vector_float (stream, " tensor_split" , tensor_split_vector);
2161+
2162+ fprintf (stream, " threads: %d # default: %u\n " , params.cpuparams .n_threads , std::thread::hardware_concurrency ());
2163+ fprintf (stream, " top_k: %d # default: 40\n " , sparams.top_k );
2164+ fprintf (stream, " top_p: %f # default: 0.95\n " , sparams.top_p );
2165+ fprintf (stream, " min_p: %f # default: 0.0\n " , sparams.min_p );
2166+ fprintf (stream, " xtc_probability: %f # default: 0.0\n " , sparams.xtc_probability );
2167+ fprintf (stream, " xtc_threshold: %f # default: 0.1\n " , sparams.xtc_threshold );
2168+ fprintf (stream, " typ_p: %f # default: 1.0\n " , sparams.typ_p );
2169+ fprintf (stream, " verbose_prompt: %s # default: false\n " , params.verbose_prompt ? " true" : " false" );
2170+ fprintf (stream, " display_prompt: %s # default: true\n " , params.display_prompt ? " true" : " false" );
2171+ }
0 commit comments