Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .devops/cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ WORKDIR /app
COPY . .

RUN if [ "$TARGETARCH" = "amd64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
elif [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
else \
echo "Unsupported architecture"; \
exit 1; \
Expand Down
2 changes: 1 addition & 1 deletion .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/llama-cli-cann.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH

RUN echo "Building with static libs" && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
cmake --build build --config Release --target llama-cli

# TODO: use image with NNRT
Expand Down
2 changes: 1 addition & 1 deletion .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ COPY . .
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ WORKDIR /app
COPY . .

RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
&& cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib \
Expand Down
2 changes: 1 addition & 1 deletion .devops/vulkan.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
156 changes: 101 additions & 55 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,25 @@ std::initializer_list<enum llama_example> mmproj_examples = {
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
};

static std::string read_file(const std::string & fname) {
std::ifstream file(fname);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
}
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
return content;
}

static void write_file(const std::string & fname, const std::string & content) {
std::ofstream file(fname);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
}
file << content;
file.close();
}

common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
this->examples = std::move(examples);
return *this;
Expand Down Expand Up @@ -200,9 +219,11 @@ struct curl_slist_ptr {

static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
int remaining_attempts = max_attempts;
char * method = nullptr;
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_METHOD, &method);

while (remaining_attempts > 0) {
LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);

CURLcode res = curl_easy_perform(curl);
if (res == CURLE_OK) {
Expand All @@ -213,6 +234,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);

remaining_attempts--;
if (remaining_attempts == 0) break;
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
}

Expand All @@ -231,8 +253,6 @@ static bool common_download_file_single(const std::string & url, const std::stri
return false;
}

bool force_download = false;

// Set the URL, allow to follow http redirection
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
Expand All @@ -256,7 +276,7 @@ static bool common_download_file_single(const std::string & url, const std::stri

// If the file exists, check its JSON metadata companion file.
std::string metadata_path = path + ".json";
nlohmann::json metadata;
nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
std::string etag;
std::string last_modified;

Expand All @@ -266,25 +286,18 @@ static bool common_download_file_single(const std::string & url, const std::stri
if (metadata_in.good()) {
try {
metadata_in >> metadata;
LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
if (metadata.contains("url") && metadata.at("url").is_string()) {
auto previous_url = metadata.at("url").get<std::string>();
if (previous_url != url) {
LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
return false;
}
}
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
etag = metadata.at("etag");
}
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
last_modified = metadata.at("lastModified");
}
} catch (const nlohmann::json::exception & e) {
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
return false;
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
}
}
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
} else {
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
}
Expand All @@ -296,7 +309,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
};

common_load_model_from_url_headers headers;
bool head_request_ok = false;
bool should_download = !file_exists; // by default, we should download if the file does not exist

// get ETag to see if the remote file has changed
{
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
Expand Down Expand Up @@ -325,23 +341,28 @@ static bool common_download_file_single(const std::string & url, const std::stri
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);

bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
// we only allow retrying once for HEAD requests
// this is for the use case of using running offline (no internet), retrying can be annoying
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0);
if (!was_perform_successful) {
return false;
head_request_ok = false;
}

long http_code = 0;
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
if (http_code != 200) {
// HEAD not supported, we don't know if the file has changed
// force trigger downloading
force_download = true;
LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
if (http_code == 200) {
head_request_ok = true;
} else {
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
head_request_ok = false;
}
}

bool should_download = !file_exists || force_download;
if (!should_download) {
// if head_request_ok is false, we don't have the etag or last-modified headers
// we leave should_download as-is, which is true if the file does not exist
if (head_request_ok) {
// check if ETag or Last-Modified headers are different
// if it is, we need to download the file again
if (!etag.empty() && etag != headers.etag) {
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
should_download = true;
Expand All @@ -350,6 +371,7 @@ static bool common_download_file_single(const std::string & url, const std::stri
should_download = true;
}
}

if (should_download) {
std::string path_temporary = path + ".downloadInProgress";
if (file_exists) {
Expand Down Expand Up @@ -424,13 +446,15 @@ static bool common_download_file_single(const std::string & url, const std::stri
{"etag", headers.etag},
{"lastModified", headers.last_modified}
});
std::ofstream(metadata_path) << metadata.dump(4);
LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
write_file(metadata_path, metadata.dump(4));
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());

if (rename(path_temporary.c_str(), path.c_str()) != 0) {
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
return false;
}
} else {
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
}

return true;
Expand Down Expand Up @@ -605,16 +629,37 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
// User-Agent header is already set in common_remote_get_content, no need to set it here

// we use "=" to avoid clashing with other component, while still being allowed on windows
std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
string_replace_all(cached_response_fname, "/", "_");
std::string cached_response_path = fs_get_cache_file(cached_response_fname);

// make the request
common_remote_params params;
params.headers = headers;
auto res = common_remote_get_content(url, params);
long res_code = res.first;
std::string res_str(res.second.data(), res.second.size());
long res_code = 0;
std::string res_str;
bool use_cache = false;
try {
auto res = common_remote_get_content(url, params);
res_code = res.first;
res_str = std::string(res.second.data(), res.second.size());
} catch (const std::exception & e) {
LOG_WRN("error: failed to get manifest: %s\n", e.what());
LOG_WRN("try reading from cache\n");
// try to read from cache
try {
res_str = read_file(cached_response_path);
res_code = 200;
use_cache = true;
} catch (const std::exception & e) {
throw std::runtime_error("error: failed to get manifest (check your internet connection)");
}
}
std::string ggufFile;
std::string mmprojFile;

if (res_code == 200) {
if (res_code == 200 || res_code == 304) {
// extract ggufFile.rfilename in json, using regex
{
std::regex pattern("\"ggufFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
Expand All @@ -631,6 +676,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
mmprojFile = match[1].str();
}
}
if (!use_cache) {
// if not using cached response, update the cache file
write_file(cached_response_path, res_str);
}
} else if (res_code == 401) {
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
} else {
Expand Down Expand Up @@ -1142,6 +1191,9 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
fprintf(stderr, "%s\n", ex.what());
ctx_arg.params = params_org;
return false;
} catch (std::exception & ex) {
fprintf(stderr, "%s\n", ex.what());
exit(1); // for other exceptions, we exit with status code 1
}

return true;
Expand Down Expand Up @@ -1442,13 +1494,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
{"-f", "--file"}, "FNAME",
"a file containing the prompt (default: none)",
[](common_params & params, const std::string & value) {
std::ifstream file(value);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
}
params.prompt = read_file(value);
// store the external file name in params
params.prompt_file = value;
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
if (!params.prompt.empty() && params.prompt.back() == '\n') {
params.prompt.pop_back();
}
Expand All @@ -1458,11 +1506,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
{"-sysf", "--system-prompt-file"}, "FNAME",
"a file containing the system prompt (default: none)",
[](common_params & params, const std::string & value) {
std::ifstream file(value);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
}
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.system_prompt));
params.system_prompt = read_file(value);
if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
params.system_prompt.pop_back();
}
Expand Down Expand Up @@ -1886,23 +1930,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
add_opt(common_arg(
{"--grammar-file"}, "FNAME",
"file to read grammar from",
[](common_params & params, const std::string & value) {
params.sampling.grammar = read_file(value);
}
).set_sparam());
add_opt(common_arg(
{"-j", "--json-schema"}, "SCHEMA",
"JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead",
[](common_params & params, const std::string & value) {
params.sampling.grammar = json_schema_to_grammar(json::parse(value));
}
).set_sparam());
add_opt(common_arg(
{"-jf", "--json-schema-file"}, "FILE",
"File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead",
[](common_params & params, const std::string & value) {
std::ifstream file(value);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
}
std::string schema;
std::copy(
std::istreambuf_iterator<char>(file),
std::istreambuf_iterator<char>(),
std::back_inserter(params.sampling.grammar)
std::back_inserter(schema)
);
}
).set_sparam());
add_opt(common_arg(
{"-j", "--json-schema"}, "SCHEMA",
"JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead",
[](common_params & params, const std::string & value) {
params.sampling.grammar = json_schema_to_grammar(json::parse(value));
params.sampling.grammar = json_schema_to_grammar(json::parse(schema));
}
).set_sparam());
add_opt(common_arg(
Expand Down Expand Up @@ -2815,14 +2868,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
),
[](common_params & params, const std::string & value) {
std::ifstream file(value);
if (!file) {
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
}
std::copy(
std::istreambuf_iterator<char>(file),
std::istreambuf_iterator<char>(),
std::back_inserter(params.chat_template));
params.chat_template = read_file(value);
}
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
add_opt(common_arg(
Expand Down
Loading
Loading