Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2338,6 +2338,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.mmproj_use_gpu = false;
}
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
add_opt(common_arg(
{"--mmproj-backend"}, "NAME",
"GPU backend for multimodal projector (e.g. CUDA, Metal, Vulkan)\n"
"if not specified, will use MTMD_BACKEND_DEVICE env var or default GPU backend",
[](common_params & params, const std::string & value) {
params.mmproj_backend = value;
}
).set_examples({LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"--image", "--audio"}, "FILE",
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ struct common_params {
// multimodal models (see tools/mtmd)
struct common_params_model mmproj;
bool mmproj_use_gpu = true; // use GPU for multimodal model
std::string mmproj_backend = ""; // GPU backend for multimodal model (e.g. "CUDA", "Metal", "Vulkan")
bool no_mmproj = false; // explicitly disable multimodal model
std::vector<std::string> image; // path to image file(s)

Expand Down
8 changes: 2 additions & 6 deletions tools/mtmd/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@
#include <numeric>
#include <functional>

struct clip_logger_state g_logger_state = {
GGML_LOG_LEVEL_CONT, // verbosity_thold
clip_log_callback_default, // log_callback
NULL // log_callback_user_data
};
struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callback_default, NULL};

enum ffn_op_type {
FFN_GELU,
Expand Down Expand Up @@ -401,7 +397,7 @@ struct clip_ctx {
throw std::runtime_error("failed to initialize CPU backend");
}
if (ctx_params.use_gpu) {
auto backend_name = std::getenv("MTMD_BACKEND_DEVICE");
auto backend_name = ctx_params.backend_device ? ctx_params.backend_device : std::getenv("MTMD_BACKEND_DEVICE");
if (backend_name != nullptr) {
backend = ggml_backend_init_by_name(backend_name, nullptr);
if (!backend) {
Expand Down
1 change: 1 addition & 0 deletions tools/mtmd/clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enum clip_modality {
struct clip_context_params {
bool use_gpu;
enum ggml_log_level verbosity;
const char * backend_device; // optional, if null will use env var or default GPU backend
};

struct clip_init_result {
Expand Down
2 changes: 2 additions & 0 deletions tools/mtmd/mtmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ mtmd_context_params mtmd_context_params_default() {
params.verbosity = GGML_LOG_LEVEL_INFO;
params.image_marker = MTMD_DEFAULT_IMAGE_MARKER;
params.media_marker = mtmd_default_marker();
params.backend_device = nullptr;
return params;
}

Expand Down Expand Up @@ -152,6 +153,7 @@ struct mtmd_context {
clip_context_params ctx_clip_params;
ctx_clip_params.use_gpu = ctx_params.use_gpu;
ctx_clip_params.verbosity = ctx_params.verbosity;
ctx_clip_params.backend_device = ctx_params.backend_device;
auto res = clip_init(mmproj_fname, ctx_clip_params);
ctx_v = res.ctx_v;
ctx_a = res.ctx_a;
Expand Down
1 change: 1 addition & 0 deletions tools/mtmd/mtmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ struct mtmd_context_params {
enum ggml_log_level verbosity;
const char * image_marker; // deprecated, use media_marker instead
const char * media_marker;
const char * backend_device; // optional GPU backend name (e.g. "CUDA", "Metal", "Vulkan"), if null will use env var or default
};

MTMD_API const char * mtmd_default_marker(void);
Expand Down
Loading