@@ -1890,213 +1890,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
18901890 return result;
18911891}
18921892
1893- //
1894- // YAML utils
1895- //
1896-
1897- void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float > & data) {
1898- if (data.empty ()) {
1899- fprintf (stream, " %s:\n " , prop_name);
1900- return ;
1901- }
1902-
1903- fprintf (stream, " %s: [" , prop_name);
1904- for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1905- fprintf (stream, " %e, " , data[i]);
1906- }
1907- fprintf (stream, " %e]\n " , data.back ());
1908- }
1909-
1910- void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int > & data) {
1911- if (data.empty ()) {
1912- fprintf (stream, " %s:\n " , prop_name);
1913- return ;
1914- }
1915-
1916- fprintf (stream, " %s: [" , prop_name);
1917- for (size_t i = 0 ; i < data.size () - 1 ; ++i) {
1918- fprintf (stream, " %d, " , data[i]);
1919- }
1920- fprintf (stream, " %d]\n " , data.back ());
1921- }
1922-
1923- void yaml_dump_string_multiline (FILE * stream, const char * prop_name, const char * data) {
1924- std::string data_str (data == NULL ? " " : data);
1925-
1926- if (data_str.empty ()) {
1927- fprintf (stream, " %s:\n " , prop_name);
1928- return ;
1929- }
1930-
1931- size_t pos_start = 0 ;
1932- size_t pos_found = 0 ;
1933-
1934- if (std::isspace (data_str[0 ]) || std::isspace (data_str.back ())) {
1935- data_str = std::regex_replace (data_str, std::regex (" \n " ), " \\ n" );
1936- data_str = std::regex_replace (data_str, std::regex (" \" " ), " \\\" " );
1937- data_str = std::regex_replace (data_str, std::regex (R"( \\[^n"])" ), R"( \$&)" );
1938- data_str = " \" " + data_str + " \" " ;
1939- fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
1940- return ;
1941- }
1942-
1943- if (data_str.find (' \n ' ) == std::string::npos) {
1944- fprintf (stream, " %s: %s\n " , prop_name, data_str.c_str ());
1945- return ;
1946- }
1947-
1948- fprintf (stream, " %s: |\n " , prop_name);
1949- while ((pos_found = data_str.find (' \n ' , pos_start)) != std::string::npos) {
1950- fprintf (stream, " %s\n " , data_str.substr (pos_start, pos_found-pos_start).c_str ());
1951- pos_start = pos_found + 1 ;
1952- }
1953- }
1954-
1955- void yaml_dump_non_result_info (FILE * stream, const common_params & params, const llama_context * lctx,
1956- const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
1957- ggml_cpu_init (); // some ARM features are detected at runtime
1958-
1959- const auto & sparams = params.sparams ;
1960-
1961- fprintf (stream, " build_commit: %s\n " , LLAMA_COMMIT);
1962- fprintf (stream, " build_number: %d\n " , LLAMA_BUILD_NUMBER);
1963- fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1964- fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1965- fprintf (stream, " cpu_has_avx_vnni: %s\n " , ggml_cpu_has_avx_vnni () ? " true" : " false" );
1966- fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1967- fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
1968- fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
1969- fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
1970- fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1971- fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1972- fprintf (stream, " cpu_has_sve: %s\n " , ggml_cpu_has_sve () ? " true" : " false" );
1973- fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1974- fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1975- fprintf (stream, " cpu_has_riscv_v: %s\n " , ggml_cpu_has_riscv_v () ? " true" : " false" );
1976- fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1977- fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1978- fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
1979- fprintf (stream, " cpu_has_matmul_int8: %s\n " , ggml_cpu_has_matmul_int8 () ? " true" : " false" );
1980-
1981- #ifdef NDEBUG
1982- fprintf (stream, " debug: false\n " );
1983- #else
1984- fprintf (stream, " debug: true\n " );
1985- #endif // NDEBUG
1986-
1987- fprintf (stream, " model_desc: %s\n " , model_desc);
1988- fprintf (stream, " n_vocab: %d # output size of the final layer, 32001 for some models\n " , llama_n_vocab (llama_get_model (lctx)));
1989-
1990- #ifdef __OPTIMIZE__
1991- fprintf (stream, " optimize: true\n " );
1992- #else
1993- fprintf (stream, " optimize: false\n " );
1994- #endif // __OPTIMIZE__
1995-
1996- fprintf (stream, " time: %s\n " , timestamp.c_str ());
1997-
1998- fprintf (stream, " \n " );
1999- fprintf (stream, " ###############\n " );
2000- fprintf (stream, " # User Inputs #\n " );
2001- fprintf (stream, " ###############\n " );
2002- fprintf (stream, " \n " );
2003-
2004- fprintf (stream, " alias: %s # default: unknown\n " , params.model_alias .c_str ());
2005- fprintf (stream, " batch_size: %d # default: 512\n " , params.n_batch );
2006- fprintf (stream, " chunks: %d # default: -1 (unlimited)\n " , params.n_chunks );
2007- fprintf (stream, " color: %s # default: false\n " , params.use_color ? " true" : " false" );
2008- fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
2009- fprintf (stream, " dry_allowed_length: %d # default: 2\n " , sparams.dry_allowed_length );
2010- fprintf (stream, " dry_base: %.2f # default: 1.75\n " , sparams.dry_base );
2011- fprintf (stream, " dry_multiplier: %.1f # default: 0.0\n " , sparams.dry_multiplier );
2012- fprintf (stream, " dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n " , sparams.dry_penalty_last_n );
2013- fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
2014- fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
2015- fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
2016- yaml_dump_string_multiline (stream, " grammar" , sparams.grammar .c_str ());
2017- fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
2018- fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
2019- fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
2020- fprintf (stream, " ignore_eos: %s # default: false\n " , sparams.ignore_eos ? " true" : " false" );
2021-
2022- yaml_dump_string_multiline (stream, " in_prefix" , params.input_prefix .c_str ());
2023- fprintf (stream, " in_prefix_bos: %s # default: false\n " , params.input_prefix_bos ? " true" : " false" );
2024- yaml_dump_string_multiline (stream, " in_suffix" , params.input_prefix .c_str ());
2025- fprintf (stream, " interactive: %s # default: false\n " , params.interactive ? " true" : " false" );
2026- fprintf (stream, " interactive_first: %s # default: false\n " , params.interactive_first ? " true" : " false" );
2027- fprintf (stream, " keep: %d # default: 0\n " , params.n_keep );
2028- fprintf (stream, " logdir: %s # default: unset (no logging)\n " , params.logdir .c_str ());
2029-
2030- fprintf (stream, " logit_bias:\n " );
2031- for (const auto & logit_bias : sparams.logit_bias ) {
2032- fprintf (stream, " %d: %f" , logit_bias.token , logit_bias.bias );
2033- }
2034-
2035- fprintf (stream, " lora:\n " );
2036- for (auto & la : params.lora_adapters ) {
2037- if (la.scale == 1 .0f ) {
2038- fprintf (stream, " - %s\n " , la.path .c_str ());
2039- }
2040- }
2041- fprintf (stream, " lora_scaled:\n " );
2042- for (auto & la : params.lora_adapters ) {
2043- if (la.scale != 1 .0f ) {
2044- fprintf (stream, " - %s: %f\n " , la.path .c_str (), la.scale );
2045- }
2046- }
2047- fprintf (stream, " lora_init_without_apply: %s # default: false\n " , params.lora_init_without_apply ? " true" : " false" );
2048- fprintf (stream, " main_gpu: %d # default: 0\n " , params.main_gpu );
2049- fprintf (stream, " min_keep: %d # default: 0 (disabled)\n " , sparams.min_keep );
2050- fprintf (stream, " mirostat: %d # default: 0 (disabled)\n " , sparams.mirostat );
2051- fprintf (stream, " mirostat_ent: %f # default: 5.0\n " , sparams.mirostat_tau );
2052- fprintf (stream, " mirostat_lr: %f # default: 0.1\n " , sparams.mirostat_eta );
2053- fprintf (stream, " mlock: %s # default: false\n " , params.use_mlock ? " true" : " false" );
2054- fprintf (stream, " model: %s # default: %s\n " , params.model .c_str (), DEFAULT_MODEL_PATH);
2055- fprintf (stream, " model_draft: %s # default:\n " , params.model_draft .c_str ());
2056- fprintf (stream, " multiline_input: %s # default: false\n " , params.multiline_input ? " true" : " false" );
2057- fprintf (stream, " n_gpu_layers: %d # default: -1\n " , params.n_gpu_layers );
2058- fprintf (stream, " n_predict: %d # default: -1 (unlimited)\n " , params.n_predict );
2059- fprintf (stream, " n_probs: %d # only used by server binary, default: 0\n " , sparams.n_probs );
2060- fprintf (stream, " no_mmap: %s # default: false\n " , !params.use_mmap ? " true" : " false" );
2061- fprintf (stream, " penalize_nl: %s # default: false\n " , sparams.penalize_nl ? " true" : " false" );
2062- fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
2063- fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
2064- fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
2065- yaml_dump_string_multiline (stream, " prompt" , params.prompt .c_str ());
2066- fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
2067- fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
2068- fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
2069- yaml_dump_vector_int (stream, " prompt_tokens" , prompt_tokens);
2070- fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
2071-
2072- fprintf (stream, " reverse_prompt:\n " );
2073- for (std::string ap : params.antiprompt ) {
2074- size_t pos = 0 ;
2075- while ((pos = ap.find (' \n ' , pos)) != std::string::npos) {
2076- ap.replace (pos, 1 , " \\ n" );
2077- pos += 1 ;
2078- }
2079-
2080- fprintf (stream, " - %s\n " , ap.c_str ());
2081- }
2082-
2083- fprintf (stream, " rope_freq_base: %f # default: 10000.0\n " , params.rope_freq_base );
2084- fprintf (stream, " rope_freq_scale: %f # default: 1.0\n " , params.rope_freq_scale );
2085- fprintf (stream, " simple_io: %s # default: false\n " , params.simple_io ? " true" : " false" );
2086- fprintf (stream, " cont_batching: %s # default: false\n " , params.cont_batching ? " true" : " false" );
2087- fprintf (stream, " flash_attn: %s # default: false\n " , params.flash_attn ? " true" : " false" );
2088- fprintf (stream, " temp: %f # default: 0.8\n " , sparams.temp );
2089-
2090- const std::vector<float > tensor_split_vector (params.tensor_split , params.tensor_split + llama_max_devices ());
2091- yaml_dump_vector_float (stream, " tensor_split" , tensor_split_vector);
2092-
2093- fprintf (stream, " threads: %d # default: %u\n " , params.cpuparams .n_threads , std::thread::hardware_concurrency ());
2094- fprintf (stream, " top_k: %d # default: 40\n " , sparams.top_k );
2095- fprintf (stream, " top_p: %f # default: 0.95\n " , sparams.top_p );
2096- fprintf (stream, " min_p: %f # default: 0.0\n " , sparams.min_p );
2097- fprintf (stream, " xtc_probability: %f # default: 0.0\n " , sparams.xtc_probability );
2098- fprintf (stream, " xtc_threshold: %f # default: 0.1\n " , sparams.xtc_threshold );
2099- fprintf (stream, " typ_p: %f # default: 1.0\n " , sparams.typ_p );
2100- fprintf (stream, " verbose_prompt: %s # default: false\n " , params.verbose_prompt ? " true" : " false" );
2101- fprintf (stream, " display_prompt: %s # default: true\n " , params.display_prompt ? " true" : " false" );
2102- }
0 commit comments