diff --git a/CMakeLists.txt b/CMakeLists.txt index 36a2078e4c9fa..0df65ebaed863 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON) option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) +option(LLAMA_YAML_CPP "llama: use yaml-cpp for YAML config file support" ON) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) @@ -190,6 +191,30 @@ if (NOT LLAMA_BUILD_COMMON) set(LLAMA_CURL OFF) endif() +# Find yaml-cpp if enabled +if (LLAMA_YAML_CPP) + find_package(PkgConfig QUIET) + if (PkgConfig_FOUND) + pkg_check_modules(YAML_CPP QUIET yaml-cpp) + endif() + + if (NOT YAML_CPP_FOUND) + find_package(yaml-cpp QUIET) + if (yaml-cpp_FOUND) + set(YAML_CPP_LIBRARIES yaml-cpp) + set(YAML_CPP_INCLUDE_DIRS ${yaml-cpp_INCLUDE_DIRS}) + endif() + endif() + + if (NOT YAML_CPP_FOUND AND NOT yaml-cpp_FOUND) + message(STATUS "yaml-cpp not found, disabling YAML config support") + set(LLAMA_YAML_CPP OFF) + else() + message(STATUS "yaml-cpp found, enabling YAML config support") + add_compile_definitions(LLAMA_YAML_CPP) + endif() +endif() + if (LLAMA_BUILD_COMMON) add_subdirectory(common) endif() diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 0ae4d698f080c..299e0508d2259 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -137,6 +137,15 @@ target_include_directories(${TARGET} PUBLIC . ../vendor) target_compile_features (${TARGET} PUBLIC cxx_std_17) target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) +if (LLAMA_YAML_CPP AND YAML_CPP_FOUND) + target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES}) + target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS}) + target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CPP) +elseif (LLAMA_YAML_CPP AND yaml-cpp_FOUND) + target_link_libraries(${TARGET} PRIVATE yaml-cpp) + target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CPP) +endif() + # # copy the license files diff --git a/common/arg.cpp b/common/arg.cpp index fcee0c4470077..f8e3d95a1c5b8 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -19,6 +19,10 @@ #define JSON_ASSERT GGML_ASSERT #include +#ifdef LLAMA_YAML_CPP +#include +#endif + #include #include #include @@ -65,6 +69,177 @@ static void write_file(const std::string & fname, const std::string & content) { file.close(); } +#ifdef LLAMA_YAML_CPP +static bool common_params_load_from_yaml(const std::string & config_file, common_params & params) { + if (config_file.empty()) { + return true; + } + + try { + YAML::Node config = YAML::LoadFile(config_file); + // Model parameters + if (config["model"]) { + params.model.path = config["model"].as(); + } + if (config["model_url"]) { + params.model.url = config["model_url"].as(); + } + if (config["model_alias"]) { + params.model_alias = config["model_alias"].as(); + } + if (config["hf_repo"]) { + params.model.hf_repo = config["hf_repo"].as(); + } + if (config["hf_file"]) { + params.model.hf_file = config["hf_file"].as(); + } + if (config["hf_token"]) { + params.hf_token = config["hf_token"].as(); + } + + // Context and prediction parameters + if (config["ctx_size"]) { + params.n_ctx = config["ctx_size"].as(); + } + if (config["predict"]) { + params.n_predict = config["predict"].as(); + } + if (config["batch_size"]) { + params.n_batch = config["batch_size"].as(); + } + if (config["ubatch_size"]) { + params.n_ubatch = config["ubatch_size"].as(); + } + if (config["keep"]) { + params.n_keep = config["keep"].as(); + } + if (config["chunks"]) { + params.n_chunks = config["chunks"].as(); + } + if (config["parallel"]) { + params.n_parallel = config["parallel"].as(); + } + if (config["sequences"]) { + params.n_sequences = config["sequences"].as(); + } + + // CPU parameters + if (config["threads"]) { + params.cpuparams.n_threads = config["threads"].as(); + } + if (config["threads_batch"]) { + params.cpuparams_batch.n_threads = config["threads_batch"].as(); + } + + // GPU parameters + if (config["n_gpu_layers"]) { + params.n_gpu_layers = config["n_gpu_layers"].as(); + } + if (config["main_gpu"]) { + params.main_gpu = config["main_gpu"].as(); + } + + // Sampling parameters + if (config["seed"]) { + int32_t seed_val = config["seed"].as(); + params.sampling.seed = (seed_val == -1) ? LLAMA_DEFAULT_SEED : static_cast(seed_val); + } + if (config["temperature"]) { + params.sampling.temp = config["temperature"].as(); + } + if (config["top_k"]) { + params.sampling.top_k = config["top_k"].as(); + } + if (config["top_p"]) { + params.sampling.top_p = config["top_p"].as(); + } + if (config["min_p"]) { + params.sampling.min_p = config["min_p"].as(); + } + if (config["typical_p"]) { + params.sampling.typ_p = config["typical_p"].as(); + } + if (config["repeat_last_n"]) { + params.sampling.penalty_last_n = config["repeat_last_n"].as(); + } + if (config["repeat_penalty"]) { + params.sampling.penalty_repeat = config["repeat_penalty"].as(); + } + if (config["frequency_penalty"]) { + params.sampling.penalty_freq = config["frequency_penalty"].as(); + } + if (config["presence_penalty"]) { + params.sampling.penalty_present = config["presence_penalty"].as(); + } + if (config["mirostat"]) { + params.sampling.mirostat = config["mirostat"].as(); + } + if (config["mirostat_tau"]) { + params.sampling.mirostat_tau = config["mirostat_tau"].as(); + } + if (config["mirostat_eta"]) { + params.sampling.mirostat_eta = config["mirostat_eta"].as(); + } + + // Prompt and system parameters + if (config["prompt"]) { + params.prompt = config["prompt"].as(); + } + if (config["system_prompt"]) { + params.system_prompt = config["system_prompt"].as(); + } + if (config["prompt_file"]) { + params.prompt_file = config["prompt_file"].as(); + } + if (config["prompt_cache"]) { + params.path_prompt_cache = config["prompt_cache"].as(); + } + + // Input/Output parameters + if (config["input_prefix"]) { + params.input_prefix = config["input_prefix"].as(); + } + if (config["input_suffix"]) { + params.input_suffix = config["input_suffix"].as(); + } + + if (config["verbose"]) { + params.verbosity = config["verbose"].as(); + } + + if (config["conversation"]) { + bool conv = config["conversation"].as(); + params.conversation_mode = conv ? COMMON_CONVERSATION_MODE_ENABLED : COMMON_CONVERSATION_MODE_DISABLED; + } + + if (config["interactive"]) { + params.interactive = config["interactive"].as(); + } + if (config["interactive_first"]) { + params.interactive_first = config["interactive_first"].as(); + } + + if (config["antiprompt"]) { + if (config["antiprompt"].IsSequence()) { + for (const auto & item : config["antiprompt"]) { + params.antiprompt.push_back(item.as()); + } + } else { + params.antiprompt.push_back(config["antiprompt"].as()); + } + } + + return true; + } catch (const YAML::Exception & e) { + fprintf(stderr, "Error parsing YAML config file '%s': %s\n", config_file.c_str(), e.what()); + return false; + } catch (const std::exception & e) { + fprintf(stderr, "Error loading YAML config file '%s': %s\n", config_file.c_str(), e.what()); + return false; + } +} +#endif + common_arg & common_arg::set_examples(std::initializer_list examples) { this->examples = std::move(examples); return *this; @@ -1301,6 +1476,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.usage = true; } )); + +#ifdef LLAMA_YAML_CPP + add_opt(common_arg( + {"--config"}, + "CONFIG_FILE", + "path to YAML configuration file", + [](common_params & params, const std::string & value) { + params.config_file = value; + if (!common_params_load_from_yaml(value, params)) { + throw std::invalid_argument("failed to load YAML config file: " + value); + } + } + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER})); +#endif + add_opt(common_arg( {"--version"}, "show version and build info", diff --git a/common/common.h b/common/common.h index 85b3b879d4536..442a4515f288d 100644 --- a/common/common.h +++ b/common/common.h @@ -332,6 +332,7 @@ struct common_params { std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT std::string logits_file = ""; // file for saving *all* logits // NOLINT + std::string config_file = ""; // path to YAML configuration file // NOLINT std::vector in_files; // all input files std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) diff --git a/examples/config.yaml b/examples/config.yaml new file mode 100644 index 0000000000000..90b886e8a295d --- /dev/null +++ b/examples/config.yaml @@ -0,0 +1,43 @@ + +model: "models/7B/ggml-model-f16.gguf" + +ctx_size: 2048 # Context size (number of tokens) +predict: 128 # Number of tokens to predict (-1 for unlimited) +batch_size: 512 # Batch size for prompt processing +ubatch_size: 512 # Physical batch size +keep: 0 # Number of tokens to keep from initial prompt +chunks: -1 # Max number of chunks to process (-1 = unlimited) +parallel: 1 # Number of parallel sequences +sequences: 1 # Number of sequences to decode + +threads: 4 # Number of threads to use +threads_batch: 4 # Number of threads for batch processing + +n_gpu_layers: -1 # Number of layers to offload to GPU (-1 = all) +main_gpu: 0 # Main GPU to use + +seed: -1 # Random seed (-1 for random) +temperature: 0.8 # Sampling temperature +top_k: 40 # Top-k sampling +top_p: 0.95 # Top-p (nucleus) sampling +min_p: 0.05 # Min-p sampling +typical_p: 1.0 # Typical-p sampling +repeat_last_n: 64 # Last n tokens to consider for repetition penalty +repeat_penalty: 1.1 # Repetition penalty +frequency_penalty: 0.0 # Frequency penalty +presence_penalty: 0.0 # Presence penalty +mirostat: 0 # Mirostat sampling (0=disabled, 1=v1, 2=v2) +mirostat_tau: 5.0 # Mirostat target entropy +mirostat_eta: 0.1 # Mirostat learning rate + + + +verbose: 0 # Verbosity level (0=quiet, 1=normal, 2=verbose) +conversation: false # Enable conversation mode +interactive: false # Enable interactive mode +interactive_first: false # Start in interactive mode + +antiprompt: + - "User:" + - "Human:" + - "\n\n" diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp index e2836ca4814b4..7ee4072344ee1 100644 --- a/tests/test-arg-parser.cpp +++ b/tests/test-arg-parser.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #undef NDEBUG #include @@ -174,5 +175,64 @@ int main(void) { printf("test-arg-parser: no curl, skipping curl-related functions\n"); } + printf("test-arg-parser: all tests OK\n\n"); + +#ifdef LLAMA_YAML_CPP + printf("test-arg-parser: testing YAML config functionality\n\n"); + std::string yaml_content = R"( +model: "test_model.gguf" +threads: 8 +ctx_size: 4096 +predict: 256 +temperature: 0.7 +top_k: 50 +top_p: 0.9 +seed: 12345 +verbose: 1 +conversation: true +antiprompt: + - "User:" + - "Stop" +)"; + std::string temp_config = "/tmp/test_config.yaml"; + std::ofstream config_file(temp_config); + config_file << yaml_content; + config_file.close(); + argv = {"binary_name", "--config", temp_config.c_str()}; + assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); + assert(params.model.path == "test_model.gguf"); + assert(params.cpuparams.n_threads == 8); + assert(params.n_ctx == 4096); + assert(params.n_predict == 256); + assert(params.sampling.temp == 0.7f); + assert(params.sampling.top_k == 50); + assert(params.sampling.top_p == 0.9f); + assert(params.sampling.seed == 12345); + assert(params.verbosity == 1); + assert(params.conversation_mode == COMMON_CONVERSATION_MODE_ENABLED); + assert(params.antiprompt.size() == 2); + assert(params.antiprompt[0] == "User:"); + assert(params.antiprompt[1] == "Stop"); + argv = {"binary_name", "--config", temp_config.c_str(), "-t", "16", "--ctx-size", "8192"}; + assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); + assert(params.model.path == "test_model.gguf"); // from config + assert(params.cpuparams.n_threads == 16); // overridden by CLI + assert(params.n_ctx == 8192); // overridden by CLI + assert(params.sampling.temp == 0.7f); // from config + std::string invalid_yaml = "/tmp/invalid_config.yaml"; + std::ofstream invalid_file(invalid_yaml); + invalid_file << "invalid: yaml: content: [unclosed"; + invalid_file.close(); + argv = {"binary_name", "--config", invalid_yaml.c_str()}; + assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); + argv = {"binary_name", "--config", "/tmp/nonexistent_config.yaml"}; + assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); + std::remove(temp_config.c_str()); + std::remove(invalid_yaml.c_str()); + printf("test-arg-parser: YAML config tests passed\n\n"); +#else + printf("test-arg-parser: YAML config support not compiled, skipping YAML tests\n\n"); +#endif + printf("test-arg-parser: all tests OK\n\n"); } diff --git a/tools/main/config-example.yaml b/tools/main/config-example.yaml new file mode 100644 index 0000000000000..921366523e736 --- /dev/null +++ b/tools/main/config-example.yaml @@ -0,0 +1,23 @@ + +model: "models/llama-2-7b-chat.gguf" + +ctx_size: 4096 +predict: 512 +batch_size: 512 + +threads: 8 + +temperature: 0.7 +top_k: 40 +top_p: 0.9 +repeat_penalty: 1.1 +seed: -1 + +conversation: true +system_prompt: "You are a helpful, harmless, and honest AI assistant." + +interactive: true + +antiprompt: + - "User:" + - "Human:" diff --git a/tools/server/config-example.yaml b/tools/server/config-example.yaml new file mode 100644 index 0000000000000..20bf4605271ac --- /dev/null +++ b/tools/server/config-example.yaml @@ -0,0 +1,19 @@ + +model: "models/llama-2-7b-chat.gguf" + +host: "127.0.0.1" +port: 8080 + +ctx_size: 4096 +batch_size: 512 +ubatch_size: 512 + +threads: 8 +n_gpu_layers: -1 + +temperature: 0.8 +top_k: 40 +top_p: 0.95 +repeat_penalty: 1.1 + +verbose: 1