diff --git a/llama.cpp/llava/llava-cli.cpp b/llama.cpp/llava/llava-cli.cpp index 3f13271cd9..60389228b2 100644 --- a/llama.cpp/llava/llava-cli.cpp +++ b/llama.cpp/llava/llava-cli.cpp @@ -8,7 +8,9 @@ #include "llama.cpp/llama.h" #include "llama.cpp/base64.h" #include "llamafile/version.h" +#include "llamafile/llamafile.h" +#include #include #include #include @@ -63,6 +65,68 @@ static const char * sample(struct llama_sampling_context * ctx_sampling, static const char* IMG_BASE64_TAG_BEGIN = ""; +// Auto-detect mmproj file from the main model if it's a ZIP/llamafile +static std::string auto_detect_mmproj(const std::string& model_path) { + // Try common mmproj filenames + std::vector common_names = { + "mmproj-model-f16.gguf", + "mmproj-model-q4_0.gguf", + "mmproj-model-q4_1.gguf", + "mmproj.gguf", + "vision_encoder.gguf", + "clip.gguf", + "visual.gguf" + }; + + LOG_TEE("Auto-detecting mmproj file (model_path='%s')...\n", model_path.c_str()); + + // First, check if model_path is a ZIP file (contains @) or a regular file + bool model_is_zip = false; + if (!model_path.empty() && model_path != DEFAULT_MODEL_PATH) { + // Check if it's a ZIP by looking for .gguf extension or seeing if we can open it + struct llamafile* test = llamafile_open_gguf(model_path.c_str(), "rb"); + if (test) { + // Check if it's actually a ZIP by looking at the opened path + const char* opened_name = llamafile_name(test); + model_is_zip = opened_name && strchr(opened_name, '@') != NULL; + llamafile_close(test); + } + + // If model is a ZIP/llamafile, try to find mmproj inside it + if (model_is_zip || model_path.find(".llamafile") != std::string::npos) { + std::string base_path = model_path; + // Remove @ suffix if present + size_t at_pos = base_path.find('@'); + if (at_pos != std::string::npos) { + base_path = base_path.substr(0, at_pos); + } + + for (const auto& name : common_names) { + std::string test_path = base_path + "@" + name; + struct llamafile* f = llamafile_open_gguf(test_path.c_str(), "rb"); + if (f) { + llamafile_close(f); + LOG_TEE("Auto-detected mmproj file: %s\n", test_path.c_str()); + return test_path; + } + } + } + } + + // Try in the executable itself (when running as ./llava.llamafile) + for (const auto& name : common_names) { + struct llamafile* f = llamafile_open_gguf(name.c_str(), "rb"); + if (f) { + llamafile_close(f); + LOG_TEE("Auto-detected mmproj file: %s (embedded in executable)\n", name.c_str()); + return name; + } + } + + LOG_TEE("No mmproj file found via auto-detection\n"); + return ""; +} + static void find_image_tag_in_prompt(const std::string& prompt, size_t& begin_out, size_t& end_out) { begin_out = prompt.find(IMG_BASE64_TAG_BEGIN); end_out = prompt.find(IMG_BASE64_TAG_END, (begin_out == std::string::npos) ? 0UL : begin_out); @@ -132,6 +196,8 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) { LOG_TEE("\n example usage:\n"); LOG_TEE("\n %s -m --mmproj --image --image [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]); + LOG_TEE("\n %s -m --image [-p \"describe the image in detail.\"]\n", argv[0]); + LOG_TEE("\n note: --mmproj is optional when using a llamafile containing multiple GGUF files\n"); LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n"); } @@ -307,6 +373,23 @@ int llava_cli(int argc, char ** argv, gpt_params & params) { llama_log_set(llama_log_callback_logTee, nullptr); #endif // LOG_DISABLE_LOGS + // Handle running as llamafile executable + if (params.model.empty()) { + // Check if we're running as a llamafile with embedded model + const char* prog = GetProgramExecutableName(); + struct llamafile* test = llamafile_open_gguf(prog, "rb"); + if (test) { + llamafile_close(test); + params.model = prog; + LOG_TEE("Running as llamafile, using embedded model: %s\n", prog); + } + } + + // Auto-detect mmproj if not provided + if (params.mmproj.empty()) { + params.mmproj = auto_detect_mmproj(params.model); + } + if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) { print_usage(argc, argv, {}); return 1; diff --git a/llamafile/llamafile.c b/llamafile/llamafile.c index 161d31155d..67a2a54e34 100644 --- a/llamafile/llamafile.c +++ b/llamafile/llamafile.c @@ -177,10 +177,12 @@ static struct llamafile *llamafile_open_zip(const char *prog, const char *fname, goto Invalid; } if (found != 1) { - // TODO: Support opening LLaVA llamafiles. - fprintf(stderr, "%s: error: multiple %s files found in zip archive\n", prog, - fname ? fname : ".gguf"); - goto Invalid; + // Multiple GGUF files found - this is OK for LLaVA models + // Just pick the first one found and log a message + fprintf(stderr, "%s: note: multiple GGUF files found in ZIP\n", prog); + if (!fname) { + fprintf(stderr, "%s: selecting '%s' (use @filename to specify)\n", prog, zip_name); + } } strlcat(file->fname, "@", PATH_MAX); strlcat(file->fname, zip_name, PATH_MAX); @@ -398,6 +400,10 @@ static void llamafile_close_impl(struct llamafile *file) { free(file); } +const char *llamafile_name(struct llamafile *file) { + return file ? file->fname : NULL; +} + void llamafile_ref(struct llamafile *file) { atomic_fetch_add(&file->refs, 1); } diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h index b74dda60dd..01f1f00e38 100644 --- a/llamafile/llamafile.h +++ b/llamafile/llamafile.h @@ -79,6 +79,7 @@ bool llamafile_eof(struct llamafile *file); FILE *llamafile_fp(struct llamafile *); void llamafile_ref(struct llamafile *); void llamafile_unref(struct llamafile *); +const char *llamafile_name(struct llamafile *); char *llamafile_get_prompt(void); void llamafile_govern(void); diff --git a/package_llava.sh b/package_llava.sh new file mode 100755 index 0000000000..0b254ffbb7 --- /dev/null +++ b/package_llava.sh @@ -0,0 +1,83 @@ +#!/bin/sh +# Script to package LLaVA models into a single llamafile +# This demonstrates the new multi-GGUF support + +set -e + +usage() { + cat << EOF +Usage: $0 + +Package a LLaVA model (language model + vision encoder) into a single llamafile. + +Arguments: + language_model.gguf Path to the language model GGUF file (e.g., llava-v1.5-7b-q4.gguf) + vision_encoder.gguf Path to the vision encoder GGUF file (e.g., mmproj-model-f16.gguf) + output.llamafile Output llamafile name + +Example: + $0 llava-v1.5-7b-q4.gguf mmproj-model-f16.gguf llava-v1.5-7b.llamafile + +The resulting llamafile can be used without specifying --mmproj: + ./llava-v1.5-7b.llamafile --image photo.jpg -p "What's in this image?" +EOF + exit 1 +} + +if [ $# -ne 3 ]; then + usage +fi + +LANGUAGE_MODEL="$1" +VISION_ENCODER="$2" +OUTPUT_FILE="$3" + +# Check if input files exist +if [ ! -f "$LANGUAGE_MODEL" ]; then + echo "Error: Language model file not found: $LANGUAGE_MODEL" + exit 1 +fi + +if [ ! -f "$VISION_ENCODER" ]; then + echo "Error: Vision encoder file not found: $VISION_ENCODER" + exit 1 +fi + +# Check if zipalign exists +if ! command -v zipalign >/dev/null 2>&1; then + echo "Error: zipalign not found. Please build it first." + echo "Run: make -j8" + exit 1 +fi + +# Check if llamafile binary exists +if [ ! -f "o/$(uname -m)/bin/llamafile" ]; then + echo "Error: llamafile binary not found. Please build it first." + echo "Run: make -j8" + exit 1 +fi + +echo "Packaging LLaVA model..." +echo " Language model: $LANGUAGE_MODEL" +echo " Vision encoder: $VISION_ENCODER" +echo " Output file: $OUTPUT_FILE" + +# Copy the llamafile binary +cp "o/$(uname -m)/bin/llamafile" "$OUTPUT_FILE" + +# Use zipalign to add both GGUF files +echo "Adding GGUF files to llamafile..." +./o/$(uname -m)/bin/zipalign -j0 "$OUTPUT_FILE" \ + "$LANGUAGE_MODEL" \ + "$VISION_ENCODER" + +# Make it executable +chmod +x "$OUTPUT_FILE" + +echo "" +echo "Successfully created $OUTPUT_FILE!" +echo "" +echo "You can now use it without specifying --mmproj:" +echo " ./$OUTPUT_FILE --image photo.jpg -p \"What's in this image?\"" +echo "" +echo "The vision encoder will be auto-detected from the embedded files." \ No newline at end of file