Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,11 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
}

// Both cannot be specified at the same time
if (!params.model.hf_repo.empty() && !params.model.docker_repo.empty()) {
throw std::invalid_argument("error: cannot specify both -hf and -dr options\n");
}

// handle model and download
{
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
Expand Down Expand Up @@ -1727,6 +1732,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params &) {
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
fprintf(stderr, "model cache path: %s\n", fs_get_cache_directory().c_str());
exit(0);
}
));
Expand Down
1 change: 1 addition & 0 deletions tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ else()
add_subdirectory(gguf-split)
add_subdirectory(imatrix)
add_subdirectory(llama-bench)
add_subdirectory(pull)
add_subdirectory(main)
add_subdirectory(perplexity)
add_subdirectory(quantize)
Expand Down
8 changes: 8 additions & 0 deletions tools/pull/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
set(TARGET llama-pull)
add_executable(${TARGET} pull.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
install(TARGETS ${TARGET} RUNTIME)
endif()
43 changes: 43 additions & 0 deletions tools/pull/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# llama-pull - Model Download Tool

A command-line tool for downloading AI models from HuggingFace and [Docker Hub](https://hub.docker.com/u/ai) for use with llama.cpp.

## Usage

```bash
# Download from HuggingFace
llama-pull -hf <user>/<model>[:<quant>]

# Download from Docker Hub
llama-pull -dr [<repo>/]<model>[:<quant>]
```

## Options

- `-hf, --hf-repo REPO` - Download model from HuggingFace repository
- `-dr, --docker-repo REPO` - Download model from Docker Hub
- `--hf-token TOKEN` - HuggingFace token for private repositories
- `-h, --help` - Show help message

## Examples

```bash
# Download a HuggingFace model
llama-pull -hf microsoft/DialoGPT-medium

# Download a Docker model (ai/ repo is default)
llama-pull -dr gemma3

# Download with specific quantization
llama-pull -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M
```

## Model Storage

Downloaded models are stored in the standard llama.cpp cache directory:
- Linux: `~/.cache/llama.cpp/`, macOS: `~/Library/Caches/llama.cpp`
- The models can then be used with other llama.cpp tools

## Requirements

- Built with `LLAMA_USE_CURL=ON` (default) for download functionality
65 changes: 65 additions & 0 deletions tools/pull/pull.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include "arg.h"
#include "common.h"
#include "log.h"

#include <cstdio>
#include <string>

static void print_usage(int, char ** argv) {
LOG("Usage: %s [options]\n", argv[0]);
LOG("\n");
LOG("Download models from HuggingFace or Docker Hub\n");
LOG("\n");
LOG("Options:\n");
LOG(" -h, --help show this help message and exit\n");
LOG(" -hf, -hfr, --hf-repo REPO download model from HuggingFace repo\n");
LOG(" format: <user>/<model>[:<quant>]\n");
LOG(" example: microsoft/DialoGPT-medium\n");
LOG(" -dr, --docker-repo REPO download model from Docker Hub\n");
LOG(" format: [<repo>/]<model>[:<quant>]\n");
LOG(" example: gemma3\n");
LOG(" --hf-token TOKEN HuggingFace token for private repos\n");
LOG("\n");
LOG("Examples:\n");
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
LOG(" %s -dr gemma3\n", argv[0]);
LOG(" %s -hf microsoft/DialoGPT-medium\n", argv[0]);
LOG("\n");
}

int main(int argc, char ** argv) {
common_params params;

// Parse command line arguments
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON, print_usage)) {
print_usage(argc, argv);
return 1;
}

// Check if help was requested or no download option provided
if (params.model.hf_repo.empty() && params.model.docker_repo.empty()) {
LOG_ERR("error: must specify either -hf <repo> or -dr <repo>\n");
print_usage(argc, argv);
return 1;
}

LOG_INF("llama-pull: downloading model...\n");
try {
// Use the existing model handling logic which downloads the model
common_init_result llama_init = common_init_from_params(params);
if (llama_init.model != nullptr) {
LOG_INF("Model downloaded and loaded successfully to: %s\n", params.model.path.c_str());

// We only want to download, not keep the model loaded
// The download happens during common_init_from_params
} else {
LOG_ERR("Failed to download or load model\n");
return 1;
}
} catch (const std::exception & e) {
LOG_ERR("Error: %s\n", e.what());
return 1;
}

return 0;
}
Loading