@@ -1953,3 +1953,213 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
19531953 return result;
19541954}
19551955
1956+ //
1957+ // YAML utils
1958+ //
1959+
1960+ void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float > & data) {
1961+ if (data.empty ()) {
1962+ fprintf (stream, " %s:\n " , prop_name);
1963+ return ;
1964+ }
1965+
1966+ fprintf (stream, " %s: [" , prop_name);
1967+ for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1968+ fprintf (stream, " %e, " , data[i]);
1969+ }
1970+ fprintf (stream, " %e]\n " , data.back ());
1971+ }
1972+
1973+ void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int > & data) {
1974+ if (data.empty ()) {
1975+ fprintf (stream, " %s:\n " , prop_name);
1976+ return ;
1977+ }
1978+
1979+ fprintf (stream, " %s: [" , prop_name);
1980+ for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1981+ fprintf (stream, " %d, " , data[i]);
1982+ }
1983+ fprintf (stream, " %d]\n " , data.back ());
1984+ }
1985+
1986+ void yaml_dump_string_multiline (FILE * stream, const char * prop_name, const char * data) {
1987+ std::string data_str (data == NULL ? " " : data);
1988+
1989+ if (data_str.empty ()) {
1990+ fprintf (stream, " %s:\n " , prop_name);
1991+ return ;
1992+ }
1993+
1994+ size_t pos_start = 0 ;
1995+ size_t pos_found = 0 ;
1996+
1997+ if (std::isspace (data_str[0 ]) || std::isspace (data_str.back ())) {
1998+ data_str = std::regex_replace (data_str, std::regex (" \n " ), " \\ n" );
1999+ data_str = std::regex_replace (data_str, std::regex (" \" " ), " \\\" " );
2000+ data_str = std::regex_replace (data_str, std::regex (R"( \\[^n"])" ), R"( \$&)" );
2001+ data_str = " \" " + data_str + " \" " ;
2002+ fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
2003+ return ;
2004+ }
2005+
2006+ if (data_str.find (' \n ' ) == std::string::npos) {
2007+ fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
2008+ return ;
2009+ }
2010+
2011+ fprintf (stream, " %s: |\n " , prop_name);
2012+ while ((pos_found = data_str.find (' \n ' , pos_start)) != std::string::npos) {
2013+ fprintf (stream, " %s\n " , data_str.substr (pos_start, pos_found-pos_start).c_str ());
2014+ pos_start = pos_found + 1 ;
2015+ }
2016+ }
2017+
2018+ void yaml_dump_non_result_info (FILE * stream, const common_params & params, const llama_context * lctx,
2019+ const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
2020+ ggml_cpu_init (); // some ARM features are detected at runtime
2021+
2022+ const auto & sparams = params.sampling ;
2023+
2024+ fprintf (stream, " build_commit: %s\n " , LLAMA_COMMIT);
2025+ fprintf (stream, " build_number: %d\n " , LLAMA_BUILD_NUMBER);
2026+ fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
2027+ fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
2028+ fprintf (stream, " cpu_has_avx_vnni: %s\n " , ggml_cpu_has_avx_vnni () ? " true" : " false" );
2029+ fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
2030+ fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
2031+ fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
2032+ fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
2033+ fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
2034+ fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
2035+ fprintf (stream, " cpu_has_sve: %s\n " , ggml_cpu_has_sve () ? " true" : " false" );
2036+ fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
2037+ fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
2038+ fprintf (stream, " cpu_has_riscv_v: %s\n " , ggml_cpu_has_riscv_v () ? " true" : " false" );
2039+ fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
2040+ fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
2041+ fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
2042+ fprintf (stream, " cpu_has_matmul_int8: %s\n " , ggml_cpu_has_matmul_int8 () ? " true" : " false" );
2043+
2044+ #ifdef NDEBUG
2045+ fprintf (stream, " debug: false\n " );
2046+ #else
2047+ fprintf (stream, " debug: true\n " );
2048+ #endif // NDEBUG
2049+
2050+ fprintf (stream, " model_desc: %s\n " , model_desc);
2051+ fprintf (stream, " n_vocab: %d # output size of the final layer, 32001 for some models\n " , llama_n_vocab (llama_get_model (lctx)));
2052+
2053+ #ifdef __OPTIMIZE__
2054+ fprintf (stream, " optimize: true\n " );
2055+ #else
2056+ fprintf (stream, " optimize: false\n " );
2057+ #endif // __OPTIMIZE__
2058+
2059+ fprintf (stream, " time: %s\n " , timestamp.c_str ());
2060+
2061+ fprintf (stream, " \n " );
2062+ fprintf (stream, " ###############\n " );
2063+ fprintf (stream, " # User Inputs #\n " );
2064+ fprintf (stream, " ###############\n " );
2065+ fprintf (stream, " \n " );
2066+
2067+ fprintf (stream, " alias: %s # default: unknown\n " , params.model_alias .c_str ());
2068+ fprintf (stream, " batch_size: %d # default: 512\n " , params.n_batch );
2069+ fprintf (stream, " chunks: %d # default: -1 (unlimited)\n " , params.n_chunks );
2070+ fprintf (stream, " color: %s # default: false\n " , params.use_color ? " true" : " false" );
2071+ fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
2072+ fprintf (stream, " dry_allowed_length: %d # default: 2\n " , sparams.dry_allowed_length );
2073+ fprintf (stream, " dry_base: %.2f # default: 1.75\n " , sparams.dry_base );
2074+ fprintf (stream, " dry_multiplier: %.1f # default: 0.0\n " , sparams.dry_multiplier );
2075+ fprintf (stream, " dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n " , sparams.dry_penalty_last_n );
2076+ fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
2077+ fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
2078+ fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
2079+ yaml_dump_string_multiline (stream, " grammar" , sparams.grammar .c_str ());
2080+ fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
2081+ fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
2082+ fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
2083+ fprintf (stream, " ignore_eos: %s # default: false\n " , sparams.ignore_eos ? " true" : " false" );
2084+
2085+ yaml_dump_string_multiline (stream, " in_prefix" , params.input_prefix .c_str ());
2086+ fprintf (stream, " in_prefix_bos: %s # default: false\n " , params.input_prefix_bos ? " true" : " false" );
2087+ yaml_dump_string_multiline (stream, " in_suffix" , params.input_prefix .c_str ());
2088+ fprintf (stream, " interactive: %s # default: false\n " , params.interactive ? " true" : " false" );
2089+ fprintf (stream, " interactive_first: %s # default: false\n " , params.interactive_first ? " true" : " false" );
2090+ fprintf (stream, " keep: %d # default: 0\n " , params.n_keep );
2091+ // fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str());
2092+
2093+ fprintf (stream, " logit_bias:\n " );
2094+ for (const auto & logit_bias : sparams.logit_bias ) {
2095+ fprintf (stream, " %d: %f" , logit_bias.token , logit_bias.bias );
2096+ }
2097+
2098+ fprintf (stream, " lora:\n " );
2099+ for (auto & la : params.lora_adapters ) {
2100+ if (la.scale == 1 .0f ) {
2101+ fprintf (stream, " - %s\n " , la.path .c_str ());
2102+ }
2103+ }
2104+ fprintf (stream, " lora_scaled:\n " );
2105+ for (auto & la : params.lora_adapters ) {
2106+ if (la.scale != 1 .0f ) {
2107+ fprintf (stream, " - %s: %f\n " , la.path .c_str (), la.scale );
2108+ }
2109+ }
2110+ fprintf (stream, " lora_init_without_apply: %s # default: false\n " , params.lora_init_without_apply ? " true" : " false" );
2111+ fprintf (stream, " main_gpu: %d # default: 0\n " , params.main_gpu );
2112+ fprintf (stream, " min_keep: %d # default: 0 (disabled)\n " , sparams.min_keep );
2113+ fprintf (stream, " mirostat: %d # default: 0 (disabled)\n " , sparams.mirostat );
2114+ fprintf (stream, " mirostat_ent: %f # default: 5.0\n " , sparams.mirostat_tau );
2115+ fprintf (stream, " mirostat_lr: %f # default: 0.1\n " , sparams.mirostat_eta );
2116+ fprintf (stream, " mlock: %s # default: false\n " , params.use_mlock ? " true" : " false" );
2117+ fprintf (stream, " model: %s # default: %s\n " , params.model .c_str (), DEFAULT_MODEL_PATH);
2118+ // fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str());
2119+ fprintf (stream, " multiline_input: %s # default: false\n " , params.multiline_input ? " true" : " false" );
2120+ fprintf (stream, " n_gpu_layers: %d # default: -1\n " , params.n_gpu_layers );
2121+ fprintf (stream, " n_predict: %d # default: -1 (unlimited)\n " , params.n_predict );
2122+ fprintf (stream, " n_probs: %d # only used by server binary, default: 0\n " , sparams.n_probs );
2123+ fprintf (stream, " no_mmap: %s # default: false\n " , !params.use_mmap ? " true" : " false" );
2124+ fprintf (stream, " penalize_nl: %s # default: false\n " , sparams.penalize_nl ? " true" : " false" );
2125+ fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
2126+ fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
2127+ fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
2128+ yaml_dump_string_multiline (stream, " prompt" , params.prompt .c_str ());
2129+ fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
2130+ fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
2131+ fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
2132+ yaml_dump_vector_int (stream, " prompt_tokens" , prompt_tokens);
2133+ fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
2134+
2135+ fprintf (stream, " reverse_prompt:\n " );
2136+ for (std::string ap : params.antiprompt ) {
2137+ size_t pos = 0 ;
2138+ while ((pos = ap.find (' \n ' , pos)) != std::string::npos) {
2139+ ap.replace (pos, 1 , " \\ n" );
2140+ pos += 1 ;
2141+ }
2142+
2143+ fprintf (stream, " - %s\n " , ap.c_str ());
2144+ }
2145+
2146+ fprintf (stream, " rope_freq_base: %f # default: 10000.0\n " , params.rope_freq_base );
2147+ fprintf (stream, " rope_freq_scale: %f # default: 1.0\n " , params.rope_freq_scale );
2148+ fprintf (stream, " simple_io: %s # default: false\n " , params.simple_io ? " true" : " false" );
2149+ fprintf (stream, " cont_batching: %s # default: false\n " , params.cont_batching ? " true" : " false" );
2150+ fprintf (stream, " flash_attn: %s # default: false\n " , params.flash_attn ? " true" : " false" );
2151+ fprintf (stream, " temp: %f # default: 0.8\n " , sparams.temp );
2152+
2153+ const std::vector<float > tensor_split_vector (params.tensor_split , params.tensor_split + llama_max_devices ());
2154+ yaml_dump_vector_float (stream, " tensor_split" , tensor_split_vector);
2155+
2156+ fprintf (stream, " threads: %d # default: %u\n " , params.cpuparams .n_threads , std::thread::hardware_concurrency ());
2157+ fprintf (stream, " top_k: %d # default: 40\n " , sparams.top_k );
2158+ fprintf (stream, " top_p: %f # default: 0.95\n " , sparams.top_p );
2159+ fprintf (stream, " min_p: %f # default: 0.0\n " , sparams.min_p );
2160+ fprintf (stream, " xtc_probability: %f # default: 0.0\n " , sparams.xtc_probability );
2161+ fprintf (stream, " xtc_threshold: %f # default: 0.1\n " , sparams.xtc_threshold );
2162+ fprintf (stream, " typ_p: %f # default: 1.0\n " , sparams.typ_p );
2163+ fprintf (stream, " verbose_prompt: %s # default: false\n " , params.verbose_prompt ? " true" : " false" );
2164+ fprintf (stream, " display_prompt: %s # default: true\n " , params.display_prompt ? " true" : " false" );
2165+ }
0 commit comments