diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36a2078e4c9fa..0df65ebaed863 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -86,6 +86,7 @@ option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 # 3rd party libs
 option(LLAMA_CURL       "llama: use libcurl to download model from an URL" ON)
 option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
+option(LLAMA_YAML_CPP   "llama: use yaml-cpp for YAML config file support" ON)
 
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
@@ -190,6 +191,30 @@ if (NOT LLAMA_BUILD_COMMON)
     set(LLAMA_CURL OFF)
 endif()
 
+# Find yaml-cpp if enabled
+if (LLAMA_YAML_CPP)
+    find_package(PkgConfig QUIET)
+    if (PkgConfig_FOUND)
+        pkg_check_modules(YAML_CPP QUIET yaml-cpp)
+    endif()
+
+    if (NOT YAML_CPP_FOUND)
+        find_package(yaml-cpp QUIET)
+        if (yaml-cpp_FOUND)
+            set(YAML_CPP_LIBRARIES yaml-cpp)
+            set(YAML_CPP_INCLUDE_DIRS ${yaml-cpp_INCLUDE_DIRS})
+        endif()
+    endif()
+
+    if (NOT YAML_CPP_FOUND AND NOT yaml-cpp_FOUND)
+        message(STATUS "yaml-cpp not found, disabling YAML config support")
+        set(LLAMA_YAML_CPP OFF)
+    else()
+        message(STATUS "yaml-cpp found, enabling YAML config support")
+        add_compile_definitions(LLAMA_YAML_CPP)
+    endif()
+endif()
+
 if (LLAMA_BUILD_COMMON)
     add_subdirectory(common)
 endif()
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 0ae4d698f080c..299e0508d2259 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -137,6 +137,15 @@ target_include_directories(${TARGET} PUBLIC . ../vendor)
 target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
 
+if (LLAMA_YAML_CPP AND YAML_CPP_FOUND)
+    target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES})
+    target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS})
+    target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CPP)
+elseif (LLAMA_YAML_CPP AND yaml-cpp_FOUND)
+    target_link_libraries(${TARGET} PRIVATE yaml-cpp)
+    target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CPP)
+endif()
+
 
 #
 # copy the license files
diff --git a/common/arg.cpp b/common/arg.cpp
index fcee0c4470077..f8e3d95a1c5b8 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -19,6 +19,10 @@
 #define JSON_ASSERT GGML_ASSERT
 #include <nlohmann/json.hpp>
 
+#ifdef LLAMA_YAML_CPP
+#include <yaml-cpp/yaml.h>
+#endif
+
 #include <algorithm>
 #include <climits>
 #include <cstdarg>
@@ -65,6 +69,177 @@ static void write_file(const std::string & fname, const std::string & content) {
     file.close();
 }
 
+#ifdef LLAMA_YAML_CPP
+static bool common_params_load_from_yaml(const std::string & config_file, common_params & params) {
+    if (config_file.empty()) {
+        return true;
+    }
+
+    try {
+        YAML::Node config = YAML::LoadFile(config_file);
+        // Model parameters
+        if (config["model"]) {
+            params.model.path = config["model"].as<std::string>();
+        }
+        if (config["model_url"]) {
+            params.model.url = config["model_url"].as<std::string>();
+        }
+        if (config["model_alias"]) {
+            params.model_alias = config["model_alias"].as<std::string>();
+        }
+        if (config["hf_repo"]) {
+            params.model.hf_repo = config["hf_repo"].as<std::string>();
+        }
+        if (config["hf_file"]) {
+            params.model.hf_file = config["hf_file"].as<std::string>();
+        }
+        if (config["hf_token"]) {
+            params.hf_token = config["hf_token"].as<std::string>();
+        }
+
+        // Context and prediction parameters
+        if (config["ctx_size"]) {
+            params.n_ctx = config["ctx_size"].as<int32_t>();
+        }
+        if (config["predict"]) {
+            params.n_predict = config["predict"].as<int32_t>();
+        }
+        if (config["batch_size"]) {
+            params.n_batch = config["batch_size"].as<int32_t>();
+        }
+        if (config["ubatch_size"]) {
+            params.n_ubatch = config["ubatch_size"].as<int32_t>();
+        }
+        if (config["keep"]) {
+            params.n_keep = config["keep"].as<int32_t>();
+        }
+        if (config["chunks"]) {
+            params.n_chunks = config["chunks"].as<int32_t>();
+        }
+        if (config["parallel"]) {
+            params.n_parallel = config["parallel"].as<int32_t>();
+        }
+        if (config["sequences"]) {
+            params.n_sequences = config["sequences"].as<int32_t>();
+        }
+
+        // CPU parameters
+        if (config["threads"]) {
+            params.cpuparams.n_threads = config["threads"].as<int>();
+        }
+        if (config["threads_batch"]) {
+            params.cpuparams_batch.n_threads = config["threads_batch"].as<int>();
+        }
+
+        // GPU parameters
+        if (config["n_gpu_layers"]) {
+            params.n_gpu_layers = config["n_gpu_layers"].as<int32_t>();
+        }
+        if (config["main_gpu"]) {
+            params.main_gpu = config["main_gpu"].as<int32_t>();
+        }
+
+        // Sampling parameters
+        if (config["seed"]) {
+            int32_t seed_val = config["seed"].as<int32_t>();
+            params.sampling.seed = (seed_val == -1) ? LLAMA_DEFAULT_SEED : static_cast<uint32_t>(seed_val);
+        }
+        if (config["temperature"]) {
+            params.sampling.temp = config["temperature"].as<float>();
+        }
+        if (config["top_k"]) {
+            params.sampling.top_k = config["top_k"].as<int32_t>();
+        }
+        if (config["top_p"]) {
+            params.sampling.top_p = config["top_p"].as<float>();
+        }
+        if (config["min_p"]) {
+            params.sampling.min_p = config["min_p"].as<float>();
+        }
+        if (config["typical_p"]) {
+            params.sampling.typ_p = config["typical_p"].as<float>();
+        }
+        if (config["repeat_last_n"]) {
+            params.sampling.penalty_last_n = config["repeat_last_n"].as<int32_t>();
+        }
+        if (config["repeat_penalty"]) {
+            params.sampling.penalty_repeat = config["repeat_penalty"].as<float>();
+        }
+        if (config["frequency_penalty"]) {
+            params.sampling.penalty_freq = config["frequency_penalty"].as<float>();
+        }
+        if (config["presence_penalty"]) {
+            params.sampling.penalty_present = config["presence_penalty"].as<float>();
+        }
+        if (config["mirostat"]) {
+            params.sampling.mirostat = config["mirostat"].as<int32_t>();
+        }
+        if (config["mirostat_tau"]) {
+            params.sampling.mirostat_tau = config["mirostat_tau"].as<float>();
+        }
+        if (config["mirostat_eta"]) {
+            params.sampling.mirostat_eta = config["mirostat_eta"].as<float>();
+        }
+
+        // Prompt and system parameters
+        if (config["prompt"]) {
+            params.prompt = config["prompt"].as<std::string>();
+        }
+        if (config["system_prompt"]) {
+            params.system_prompt = config["system_prompt"].as<std::string>();
+        }
+        if (config["prompt_file"]) {
+            params.prompt_file = config["prompt_file"].as<std::string>();
+        }
+        if (config["prompt_cache"]) {
+            params.path_prompt_cache = config["prompt_cache"].as<std::string>();
+        }
+
+        // Input/Output parameters
+        if (config["input_prefix"]) {
+            params.input_prefix = config["input_prefix"].as<std::string>();
+        }
+        if (config["input_suffix"]) {
+            params.input_suffix = config["input_suffix"].as<std::string>();
+        }
+
+        if (config["verbose"]) {
+            params.verbosity = config["verbose"].as<int32_t>();
+        }
+
+        if (config["conversation"]) {
+            bool conv = config["conversation"].as<bool>();
+            params.conversation_mode = conv ? COMMON_CONVERSATION_MODE_ENABLED : COMMON_CONVERSATION_MODE_DISABLED;
+        }
+
+        if (config["interactive"]) {
+            params.interactive = config["interactive"].as<bool>();
+        }
+        if (config["interactive_first"]) {
+            params.interactive_first = config["interactive_first"].as<bool>();
+        }
+
+        if (config["antiprompt"]) {
+            if (config["antiprompt"].IsSequence()) {
+                for (const auto & item : config["antiprompt"]) {
+                    params.antiprompt.push_back(item.as<std::string>());
+                }
+            } else {
+                params.antiprompt.push_back(config["antiprompt"].as<std::string>());
+            }
+        }
+
+        return true;
+    } catch (const YAML::Exception & e) {
+        fprintf(stderr, "Error parsing YAML config file '%s': %s\n", config_file.c_str(), e.what());
+        return false;
+    } catch (const std::exception & e) {
+        fprintf(stderr, "Error loading YAML config file '%s': %s\n", config_file.c_str(), e.what());
+        return false;
+    }
+}
+#endif
+
 common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
     this->examples = std::move(examples);
     return *this;
@@ -1301,6 +1476,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.usage = true;
         }
     ));
+
+#ifdef LLAMA_YAML_CPP
+    add_opt(common_arg(
+        {"--config"},
+        "CONFIG_FILE",
+        "path to YAML configuration file",
+        [](common_params & params, const std::string & value) {
+            params.config_file = value;
+            if (!common_params_load_from_yaml(value, params)) {
+                throw std::invalid_argument("failed to load YAML config file: " + value);
+            }
+        }
+    ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
+#endif
+
     add_opt(common_arg(
         {"--version"},
         "show version and build info",
diff --git a/common/common.h b/common/common.h
index 85b3b879d4536..442a4515f288d 100644
--- a/common/common.h
+++ b/common/common.h
@@ -332,6 +332,7 @@ struct common_params {
     std::string lookup_cache_static  = ""; // path of static ngram cache file for lookup decoding           // NOLINT
     std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding          // NOLINT
     std::string logits_file          = ""; // file for saving *all* logits                                  // NOLINT
+    std::string config_file           = ""; // path to YAML configuration file                                   // NOLINT
 
     std::vector<std::string> in_files;   // all input files
     std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
diff --git a/examples/config.yaml b/examples/config.yaml
new file mode 100644
index 0000000000000..90b886e8a295d
--- /dev/null
+++ b/examples/config.yaml
@@ -0,0 +1,43 @@
+
+model: "models/7B/ggml-model-f16.gguf"
+
+ctx_size: 2048        # Context size (number of tokens)
+predict: 128          # Number of tokens to predict (-1 for unlimited)
+batch_size: 512       # Batch size for prompt processing
+ubatch_size: 512      # Physical batch size
+keep: 0               # Number of tokens to keep from initial prompt
+chunks: -1            # Max number of chunks to process (-1 = unlimited)
+parallel: 1           # Number of parallel sequences
+sequences: 1          # Number of sequences to decode
+
+threads: 4            # Number of threads to use
+threads_batch: 4      # Number of threads for batch processing
+
+n_gpu_layers: -1      # Number of layers to offload to GPU (-1 = all)
+main_gpu: 0           # Main GPU to use
+
+seed: -1              # Random seed (-1 for random)
+temperature: 0.8      # Sampling temperature
+top_k: 40             # Top-k sampling
+top_p: 0.95           # Top-p (nucleus) sampling
+min_p: 0.05           # Min-p sampling
+typical_p: 1.0        # Typical-p sampling
+repeat_last_n: 64     # Last n tokens to consider for repetition penalty
+repeat_penalty: 1.1   # Repetition penalty
+frequency_penalty: 0.0  # Frequency penalty
+presence_penalty: 0.0   # Presence penalty
+mirostat: 0           # Mirostat sampling (0=disabled, 1=v1, 2=v2)
+mirostat_tau: 5.0     # Mirostat target entropy
+mirostat_eta: 0.1     # Mirostat learning rate
+
+
+
+verbose: 0            # Verbosity level (0=quiet, 1=normal, 2=verbose)
+conversation: false   # Enable conversation mode
+interactive: false    # Enable interactive mode
+interactive_first: false  # Start in interactive mode
+
+antiprompt:
+  - "User:"
+  - "Human:"
+  - "\n\n"
diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp
index e2836ca4814b4..7ee4072344ee1 100644
--- a/tests/test-arg-parser.cpp
+++ b/tests/test-arg-parser.cpp
@@ -5,6 +5,7 @@
 #include <vector>
 #include <sstream>
 #include <unordered_set>
+#include <fstream>
 
 #undef NDEBUG
 #include <cassert>
@@ -174,5 +175,64 @@ int main(void) {
         printf("test-arg-parser: no curl, skipping curl-related functions\n");
     }
 
+    printf("test-arg-parser: all tests OK\n\n");
+
+#ifdef LLAMA_YAML_CPP
+    printf("test-arg-parser: testing YAML config functionality\n\n");
+    std::string yaml_content = R"(
+model: "test_model.gguf"
+threads: 8
+ctx_size: 4096
+predict: 256
+temperature: 0.7
+top_k: 50
+top_p: 0.9
+seed: 12345
+verbose: 1
+conversation: true
+antiprompt:
+  - "User:"
+  - "Stop"
+)";
+    std::string temp_config = "/tmp/test_config.yaml";
+    std::ofstream config_file(temp_config);
+    config_file << yaml_content;
+    config_file.close();
+    argv = {"binary_name", "--config", temp_config.c_str()};
+    assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
+    assert(params.model.path == "test_model.gguf");
+    assert(params.cpuparams.n_threads == 8);
+    assert(params.n_ctx == 4096);
+    assert(params.n_predict == 256);
+    assert(params.sampling.temp == 0.7f);
+    assert(params.sampling.top_k == 50);
+    assert(params.sampling.top_p == 0.9f);
+    assert(params.sampling.seed == 12345);
+    assert(params.verbosity == 1);
+    assert(params.conversation_mode == COMMON_CONVERSATION_MODE_ENABLED);
+    assert(params.antiprompt.size() == 2);
+    assert(params.antiprompt[0] == "User:");
+    assert(params.antiprompt[1] == "Stop");
+    argv = {"binary_name", "--config", temp_config.c_str(), "-t", "16", "--ctx-size", "8192"};
+    assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
+    assert(params.model.path == "test_model.gguf");  // from config
+    assert(params.cpuparams.n_threads == 16);        // overridden by CLI
+    assert(params.n_ctx == 8192);                    // overridden by CLI
+    assert(params.sampling.temp == 0.7f);            // from config
+    std::string invalid_yaml = "/tmp/invalid_config.yaml";
+    std::ofstream invalid_file(invalid_yaml);
+    invalid_file << "invalid: yaml: content: [unclosed";
+    invalid_file.close();
+    argv = {"binary_name", "--config", invalid_yaml.c_str()};
+    assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
+    argv = {"binary_name", "--config", "/tmp/nonexistent_config.yaml"};
+    assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
+    std::remove(temp_config.c_str());
+    std::remove(invalid_yaml.c_str());
+    printf("test-arg-parser: YAML config tests passed\n\n");
+#else
+    printf("test-arg-parser: YAML config support not compiled, skipping YAML tests\n\n");
+#endif
+
     printf("test-arg-parser: all tests OK\n\n");
 }
diff --git a/tools/main/config-example.yaml b/tools/main/config-example.yaml
new file mode 100644
index 0000000000000..921366523e736
--- /dev/null
+++ b/tools/main/config-example.yaml
@@ -0,0 +1,23 @@
+
+model: "models/llama-2-7b-chat.gguf"
+
+ctx_size: 4096
+predict: 512
+batch_size: 512
+
+threads: 8
+
+temperature: 0.7
+top_k: 40
+top_p: 0.9
+repeat_penalty: 1.1
+seed: -1
+
+conversation: true
+system_prompt: "You are a helpful, harmless, and honest AI assistant."
+
+interactive: true
+
+antiprompt:
+  - "User:"
+  - "Human:"
diff --git a/tools/server/config-example.yaml b/tools/server/config-example.yaml
new file mode 100644
index 0000000000000..20bf4605271ac
--- /dev/null
+++ b/tools/server/config-example.yaml
@@ -0,0 +1,19 @@
+
+model: "models/llama-2-7b-chat.gguf"
+
+host: "127.0.0.1"
+port: 8080
+
+ctx_size: 4096
+batch_size: 512
+ubatch_size: 512
+
+threads: 8
+n_gpu_layers: -1
+
+temperature: 0.8
+top_k: 40
+top_p: 0.95
+repeat_penalty: 1.1
+
+verbose: 1