Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
# 3rd party libs
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
option(LLAMA_YAML_CPP "llama: use yaml-cpp for YAML config file support" ON)

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
Expand Down Expand Up @@ -190,6 +191,30 @@ if (NOT LLAMA_BUILD_COMMON)
set(LLAMA_CURL OFF)
endif()

# Find yaml-cpp if enabled
if (LLAMA_YAML_CPP)
find_package(PkgConfig QUIET)
if (PkgConfig_FOUND)
pkg_check_modules(YAML_CPP QUIET yaml-cpp)
endif()

if (NOT YAML_CPP_FOUND)
find_package(yaml-cpp QUIET)
if (yaml-cpp_FOUND)
set(YAML_CPP_LIBRARIES yaml-cpp)
set(YAML_CPP_INCLUDE_DIRS ${yaml-cpp_INCLUDE_DIRS})
endif()
endif()

if (NOT YAML_CPP_FOUND AND NOT yaml-cpp_FOUND)
message(STATUS "yaml-cpp not found, disabling YAML config support")
set(LLAMA_YAML_CPP OFF)
else()
message(STATUS "yaml-cpp found, enabling YAML config support")
add_compile_definitions(LLAMA_YAML_CPP)
endif()
endif()

if (LLAMA_BUILD_COMMON)
add_subdirectory(common)
endif()
Expand Down
9 changes: 9 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ target_include_directories(${TARGET} PUBLIC . ../vendor)
target_compile_features (${TARGET} PUBLIC cxx_std_17)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

if (LLAMA_YAML_CPP AND YAML_CPP_FOUND)
target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES})
target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS})
target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CPP)
elseif (LLAMA_YAML_CPP AND yaml-cpp_FOUND)
target_link_libraries(${TARGET} PRIVATE yaml-cpp)
target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CPP)
endif()


#
# copy the license files
Expand Down
190 changes: 190 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
#define JSON_ASSERT GGML_ASSERT
#include <nlohmann/json.hpp>

#ifdef LLAMA_YAML_CPP
#include <yaml-cpp/yaml.h>
#endif

#include <algorithm>
#include <climits>
#include <cstdarg>
Expand Down Expand Up @@ -65,6 +69,177 @@ static void write_file(const std::string & fname, const std::string & content) {
file.close();
}

#ifdef LLAMA_YAML_CPP
static bool common_params_load_from_yaml(const std::string & config_file, common_params & params) {
if (config_file.empty()) {
return true;
}

try {
YAML::Node config = YAML::LoadFile(config_file);
// Model parameters
if (config["model"]) {
params.model.path = config["model"].as<std::string>();
}
if (config["model_url"]) {
params.model.url = config["model_url"].as<std::string>();
}
if (config["model_alias"]) {
params.model_alias = config["model_alias"].as<std::string>();
}
if (config["hf_repo"]) {
params.model.hf_repo = config["hf_repo"].as<std::string>();
}
if (config["hf_file"]) {
params.model.hf_file = config["hf_file"].as<std::string>();
}
if (config["hf_token"]) {
params.hf_token = config["hf_token"].as<std::string>();
}

// Context and prediction parameters
if (config["ctx_size"]) {
params.n_ctx = config["ctx_size"].as<int32_t>();
}
if (config["predict"]) {
params.n_predict = config["predict"].as<int32_t>();
}
if (config["batch_size"]) {
params.n_batch = config["batch_size"].as<int32_t>();
}
if (config["ubatch_size"]) {
params.n_ubatch = config["ubatch_size"].as<int32_t>();
}
if (config["keep"]) {
params.n_keep = config["keep"].as<int32_t>();
}
if (config["chunks"]) {
params.n_chunks = config["chunks"].as<int32_t>();
}
if (config["parallel"]) {
params.n_parallel = config["parallel"].as<int32_t>();
}
if (config["sequences"]) {
params.n_sequences = config["sequences"].as<int32_t>();
}

// CPU parameters
if (config["threads"]) {
params.cpuparams.n_threads = config["threads"].as<int>();
}
if (config["threads_batch"]) {
params.cpuparams_batch.n_threads = config["threads_batch"].as<int>();
}

// GPU parameters
if (config["n_gpu_layers"]) {
params.n_gpu_layers = config["n_gpu_layers"].as<int32_t>();
}
if (config["main_gpu"]) {
params.main_gpu = config["main_gpu"].as<int32_t>();
}

// Sampling parameters
if (config["seed"]) {
int32_t seed_val = config["seed"].as<int32_t>();
params.sampling.seed = (seed_val == -1) ? LLAMA_DEFAULT_SEED : static_cast<uint32_t>(seed_val);
}
if (config["temperature"]) {
params.sampling.temp = config["temperature"].as<float>();
}
if (config["top_k"]) {
params.sampling.top_k = config["top_k"].as<int32_t>();
}
if (config["top_p"]) {
params.sampling.top_p = config["top_p"].as<float>();
}
if (config["min_p"]) {
params.sampling.min_p = config["min_p"].as<float>();
}
if (config["typical_p"]) {
params.sampling.typ_p = config["typical_p"].as<float>();
}
if (config["repeat_last_n"]) {
params.sampling.penalty_last_n = config["repeat_last_n"].as<int32_t>();
}
if (config["repeat_penalty"]) {
params.sampling.penalty_repeat = config["repeat_penalty"].as<float>();
}
if (config["frequency_penalty"]) {
params.sampling.penalty_freq = config["frequency_penalty"].as<float>();
}
if (config["presence_penalty"]) {
params.sampling.penalty_present = config["presence_penalty"].as<float>();
}
if (config["mirostat"]) {
params.sampling.mirostat = config["mirostat"].as<int32_t>();
}
if (config["mirostat_tau"]) {
params.sampling.mirostat_tau = config["mirostat_tau"].as<float>();
}
if (config["mirostat_eta"]) {
params.sampling.mirostat_eta = config["mirostat_eta"].as<float>();
}

// Prompt and system parameters
if (config["prompt"]) {
params.prompt = config["prompt"].as<std::string>();
}
if (config["system_prompt"]) {
params.system_prompt = config["system_prompt"].as<std::string>();
}
if (config["prompt_file"]) {
params.prompt_file = config["prompt_file"].as<std::string>();
}
if (config["prompt_cache"]) {
params.path_prompt_cache = config["prompt_cache"].as<std::string>();
}

// Input/Output parameters
if (config["input_prefix"]) {
params.input_prefix = config["input_prefix"].as<std::string>();
}
if (config["input_suffix"]) {
params.input_suffix = config["input_suffix"].as<std::string>();
}

if (config["verbose"]) {
params.verbosity = config["verbose"].as<int32_t>();
}

if (config["conversation"]) {
bool conv = config["conversation"].as<bool>();
params.conversation_mode = conv ? COMMON_CONVERSATION_MODE_ENABLED : COMMON_CONVERSATION_MODE_DISABLED;
}

if (config["interactive"]) {
params.interactive = config["interactive"].as<bool>();
}
if (config["interactive_first"]) {
params.interactive_first = config["interactive_first"].as<bool>();
}

if (config["antiprompt"]) {
if (config["antiprompt"].IsSequence()) {
for (const auto & item : config["antiprompt"]) {
params.antiprompt.push_back(item.as<std::string>());
}
} else {
params.antiprompt.push_back(config["antiprompt"].as<std::string>());
}
}

return true;
} catch (const YAML::Exception & e) {
fprintf(stderr, "Error parsing YAML config file '%s': %s\n", config_file.c_str(), e.what());
return false;
} catch (const std::exception & e) {
fprintf(stderr, "Error loading YAML config file '%s': %s\n", config_file.c_str(), e.what());
return false;
}
}
#endif

common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
this->examples = std::move(examples);
return *this;
Expand Down Expand Up @@ -1301,6 +1476,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.usage = true;
}
));

#ifdef LLAMA_YAML_CPP
add_opt(common_arg(
{"--config"},
"CONFIG_FILE",
"path to YAML configuration file",
[](common_params & params, const std::string & value) {
params.config_file = value;
if (!common_params_load_from_yaml(value, params)) {
throw std::invalid_argument("failed to load YAML config file: " + value);
}
}
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
#endif

add_opt(common_arg(
{"--version"},
"show version and build info",
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ struct common_params {
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
std::string logits_file = ""; // file for saving *all* logits // NOLINT
std::string config_file = ""; // path to YAML configuration file // NOLINT

std::vector<std::string> in_files; // all input files
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
Expand Down
43 changes: 43 additions & 0 deletions examples/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

model: "models/7B/ggml-model-f16.gguf"

ctx_size: 2048 # Context size (number of tokens)
predict: 128 # Number of tokens to predict (-1 for unlimited)
batch_size: 512 # Batch size for prompt processing
ubatch_size: 512 # Physical batch size
keep: 0 # Number of tokens to keep from initial prompt
chunks: -1 # Max number of chunks to process (-1 = unlimited)
parallel: 1 # Number of parallel sequences
sequences: 1 # Number of sequences to decode

threads: 4 # Number of threads to use
threads_batch: 4 # Number of threads for batch processing

n_gpu_layers: -1 # Number of layers to offload to GPU (-1 = all)
main_gpu: 0 # Main GPU to use

seed: -1 # Random seed (-1 for random)
temperature: 0.8 # Sampling temperature
top_k: 40 # Top-k sampling
top_p: 0.95 # Top-p (nucleus) sampling
min_p: 0.05 # Min-p sampling
typical_p: 1.0 # Typical-p sampling
repeat_last_n: 64 # Last n tokens to consider for repetition penalty
repeat_penalty: 1.1 # Repetition penalty
frequency_penalty: 0.0 # Frequency penalty
presence_penalty: 0.0 # Presence penalty
mirostat: 0 # Mirostat sampling (0=disabled, 1=v1, 2=v2)
mirostat_tau: 5.0 # Mirostat target entropy
mirostat_eta: 0.1 # Mirostat learning rate



verbose: 0 # Verbosity level (0=quiet, 1=normal, 2=verbose)
conversation: false # Enable conversation mode
interactive: false # Enable interactive mode
interactive_first: false # Start in interactive mode

antiprompt:
- "User:"
- "Human:"
- "\n\n"
60 changes: 60 additions & 0 deletions tests/test-arg-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <vector>
#include <sstream>
#include <unordered_set>
#include <fstream>

#undef NDEBUG
#include <cassert>
Expand Down Expand Up @@ -174,5 +175,64 @@ int main(void) {
printf("test-arg-parser: no curl, skipping curl-related functions\n");
}

printf("test-arg-parser: all tests OK\n\n");

#ifdef LLAMA_YAML_CPP
printf("test-arg-parser: testing YAML config functionality\n\n");
std::string yaml_content = R"(
model: "test_model.gguf"
threads: 8
ctx_size: 4096
predict: 256
temperature: 0.7
top_k: 50
top_p: 0.9
seed: 12345
verbose: 1
conversation: true
antiprompt:
- "User:"
- "Stop"
)";
std::string temp_config = "/tmp/test_config.yaml";
std::ofstream config_file(temp_config);
config_file << yaml_content;
config_file.close();
argv = {"binary_name", "--config", temp_config.c_str()};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
assert(params.model.path == "test_model.gguf");
assert(params.cpuparams.n_threads == 8);
assert(params.n_ctx == 4096);
assert(params.n_predict == 256);
assert(params.sampling.temp == 0.7f);
assert(params.sampling.top_k == 50);
assert(params.sampling.top_p == 0.9f);
assert(params.sampling.seed == 12345);
assert(params.verbosity == 1);
assert(params.conversation_mode == COMMON_CONVERSATION_MODE_ENABLED);
assert(params.antiprompt.size() == 2);
assert(params.antiprompt[0] == "User:");
assert(params.antiprompt[1] == "Stop");
argv = {"binary_name", "--config", temp_config.c_str(), "-t", "16", "--ctx-size", "8192"};
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
assert(params.model.path == "test_model.gguf"); // from config
assert(params.cpuparams.n_threads == 16); // overridden by CLI
assert(params.n_ctx == 8192); // overridden by CLI
assert(params.sampling.temp == 0.7f); // from config
std::string invalid_yaml = "/tmp/invalid_config.yaml";
std::ofstream invalid_file(invalid_yaml);
invalid_file << "invalid: yaml: content: [unclosed";
invalid_file.close();
argv = {"binary_name", "--config", invalid_yaml.c_str()};
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
argv = {"binary_name", "--config", "/tmp/nonexistent_config.yaml"};
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
std::remove(temp_config.c_str());
std::remove(invalid_yaml.c_str());
printf("test-arg-parser: YAML config tests passed\n\n");
#else
printf("test-arg-parser: YAML config support not compiled, skipping YAML tests\n\n");
#endif

printf("test-arg-parser: all tests OK\n\n");
}
Loading
Loading