Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/intel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
2 changes: 1 addition & 1 deletion .devops/musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ COPY . .
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
6 changes: 3 additions & 3 deletions .devops/rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# gfx906 is deprecated
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html

#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
ARG ROCM_DOCKER_ARCH=gfx1100
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
#ARG ROCM_DOCKER_ARCH=gfx1100

# Set nvcc architectured
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
Expand All @@ -40,7 +40,7 @@ WORKDIR /app
COPY . .

RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release \
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
&& cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib \
Expand Down
2 changes: 1 addition & 1 deletion .devops/vulkan.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
Expand Down
56 changes: 49 additions & 7 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Model:
model_name: str | None
metadata_override: Path | None
dir_model_card: Path
remote_hf_model_id: str | None

# subclasses should define this!
model_arch: gguf.MODEL_ARCH
Expand All @@ -73,7 +74,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
use_temp_file: bool = False, eager: bool = False,
metadata_override: Path | None = None, model_name: str | None = None,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
small_first_shard: bool = False, hparams: dict[str, Any] | None = None):
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None):
if type(self) is Model:
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")

Expand All @@ -83,11 +84,24 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
self.is_big_endian = is_big_endian
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
self.use_temp_file = use_temp_file
self.lazy = not eager
self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
self.is_safetensors = len(self.part_names) > 0
if not self.is_safetensors:
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
self.lazy = not eager or (remote_hf_model_id is not None)
self.remote_hf_model_id = remote_hf_model_id
if remote_hf_model_id is not None:
self.is_safetensors = True

def get_remote_tensors() -> Iterator[tuple[str, Tensor]]:
logger.info(f"Using remote model with HuggingFace id: {remote_hf_model_id}")
remote_tensors = gguf.utility.SafetensorRemote.get_list_tensors_hf_model(remote_hf_model_id)
self.tensor_names = set(name for name in remote_tensors.keys())
for name, remote_tensor in gguf.utility.SafetensorRemote.get_list_tensors_hf_model(remote_hf_model_id).items():
yield (name, LazyTorchTensor.from_remote_tensor(remote_tensor))

self.get_tensors = get_remote_tensors
else:
self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
self.is_safetensors = len(self.part_names) > 0
if not self.is_safetensors:
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
self.hparams = Model.load_hparams(self.dir_model) if hparams is None else hparams
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
Expand Down Expand Up @@ -393,6 +407,10 @@ def prepare_metadata(self, vocab_only: bool):

self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model_card, self.model_name, total_params)

# If we are using HF model id, set the metadata name to the model id
if self.remote_hf_model_id:
self.metadata.name = self.remote_hf_model_id

# Fallback to model directory name if metadata name is still missing
if self.metadata.name is None:
self.metadata.name = self.dir_model.name
Expand Down Expand Up @@ -5403,6 +5421,14 @@ def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[:])
return cast(torch.Tensor, lazy)

@classmethod
def from_remote_tensor(cls, remote_tensor: gguf.utility.RemoteTensor):
dtype = cls._dtype_str_map[remote_tensor.dtype]
shape = remote_tensor.shape
meta = cls.meta_with_dtype_and_shape(dtype, shape)
lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape))
return cast(torch.Tensor, lazy)

@classmethod
def __torch_function__(cls, func, types, args=(), kwargs=None):
del types # unused
Expand Down Expand Up @@ -5480,6 +5506,10 @@ def parse_args() -> argparse.Namespace:
"--print-supported-models", action="store_true",
help="Print the supported models"
)
parser.add_argument(
"--remote", action="store_true",
help="(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token.",
)

args = parser.parse_args()
if not args.print_supported_models and args.model is None:
Expand Down Expand Up @@ -5520,6 +5550,14 @@ def main() -> None:

dir_model = args.model

if args.remote:
from huggingface_hub import snapshot_download
local_dir = snapshot_download(
repo_id=str(dir_model),
allow_patterns=["LICENSE", "*.json", "*.md", "*.txt", "tokenizer.model"])
dir_model = Path(local_dir)
logger.info(f"Downloaded config and tokenizer to {local_dir}")

if not dir_model.is_dir():
logger.error(f'Error: {args.model} is not a directory')
sys.exit(1)
Expand All @@ -5541,6 +5579,9 @@ def main() -> None:

if args.outfile is not None:
fname_out = args.outfile
elif args.remote:
# if remote, use the model ID as the output file name
fname_out = Path("./" + str(args.model).replace("/", "-") + "-{ftype}.gguf")
else:
fname_out = dir_model

Expand All @@ -5564,7 +5605,8 @@ def main() -> None:
metadata_override=args.metadata, model_name=args.model_name,
split_max_tensors=args.split_max_tensors,
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
small_first_shard=args.no_tensor_first_split)
small_first_shard=args.no_tensor_first_split,
remote_hf_model_id=str(args.model) if args.remote else None)

if args.vocab_only:
logger.info("Exporting model vocab...")
Expand Down
33 changes: 32 additions & 1 deletion examples/llava/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# llava (legacy)

add_library(llava OBJECT
llava.cpp
llava.h
Expand All @@ -22,12 +24,41 @@ if (BUILD_SHARED_LIBS)
install(TARGETS llava_shared LIBRARY)
endif()

# mtmd

add_library(mtmd OBJECT
mtmd.cpp
mtmd.h
clip.cpp
clip.h
clip-impl.h
)

target_link_libraries(mtmd PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})

target_include_directories(mtmd PUBLIC .)
target_include_directories(mtmd PRIVATE ../..)
target_include_directories(mtmd PRIVATE ../../common) # for stb_image.h

target_compile_features(mtmd PRIVATE cxx_std_17)

add_library(mtmd_static STATIC $<TARGET_OBJECTS:mtmd>)
if (BUILD_SHARED_LIBS)
set_target_properties(mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mtmd PRIVATE LLAMA_SHARED LLAMA_BUILD)
add_library(mtmd_shared SHARED $<TARGET_OBJECTS:mtmd>)
target_link_libraries(mtmd_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS mtmd_shared LIBRARY)
endif()

if (NOT MSVC)
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
target_compile_options(mtmd PRIVATE -Wno-cast-qual) # stb_image.h
endif()

if(TARGET BUILD_INFO)
add_dependencies(llava BUILD_INFO)
add_dependencies(mtmd BUILD_INFO)
endif()

set(TARGET llama-llava-cli)
Expand Down Expand Up @@ -55,7 +86,7 @@ set(TARGET llama-gemma3-cli)
add_executable(${TARGET} gemma3-cli.cpp)
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-gemma3-cli)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(${TARGET} PRIVATE common mtmd ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

set(TARGET llama-llava-clip-quantize-cli)
Expand Down
62 changes: 62 additions & 0 deletions examples/llava/clip-impl.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#include "ggml.h"
#include "gguf.h"

#include "clip.h"

#include <climits>
#include <cstdarg>
#include <string>
#include <map>
#include <sstream>
#include <vector>
#include <memory>

// Internal header for clip.cpp

Expand Down Expand Up @@ -120,6 +123,23 @@ static projector_type clip_projector_type_from_string(const std::string & str) {
return PROJECTOR_TYPE_UNKNOWN;
}

// RGB uint8 image
struct clip_image_u8 {
int nx;
int ny;

std::vector<uint8_t> buf;
};

// RGB float32 image (NHWC)
// Memory layout: RGBRGBRGB...
struct clip_image_f32 {
int nx;
int ny;

std::vector<float> buf;
};

//
// logging
//
Expand Down Expand Up @@ -178,6 +198,28 @@ static void clip_log_internal(enum ggml_log_level level, const char * format, ..
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__)
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, __VA_ARGS__)

//
// cpp wrappers
//

struct clip_image_u8_deleter {
void operator()(clip_image_u8 * val) { clip_image_u8_free(val); }
};

struct clip_image_f32_deleter {
void operator()(clip_image_f32 * val) { clip_image_f32_free(val); }
};

struct clip_image_f32_batch_deleter {
void operator()(clip_image_f32_batch * val) { clip_image_f32_batch_free(val); }
};

typedef std::unique_ptr<clip_image_u8, clip_image_u8_deleter> clip_image_u8_ptr;
typedef std::unique_ptr<clip_image_f32, clip_image_f32_deleter> clip_image_f32_ptr;
typedef std::unique_ptr<clip_image_f32_batch, clip_image_f32_batch_deleter> clip_image_f32_batch_ptr;

// TODO @ngxson : we're currently having a naming clash between struct clip_image_size and function clip_image_size()

//
// common utils
//
Expand Down Expand Up @@ -214,6 +256,20 @@ static void string_replace_all(std::string & s, const std::string & search, cons
s = std::move(builder);
}

// split string by a `std::string delim` instead of `char delim`
static std::vector<std::string> string_split_str(std::string s, const std::string & delimiter) {
std::vector<std::string> tokens;
size_t pos = 0;
std::string token;
while ((pos = s.find(delimiter)) != std::string::npos) {
token = s.substr(0, pos);
tokens.push_back(token);
s.erase(0, pos + delimiter.length());
}
tokens.push_back(s);
return tokens;
}

//
// gguf utils
//
Expand Down Expand Up @@ -271,3 +327,9 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}

//
// API used internally with mtmd
//

projector_type clip_get_projector_type(const struct clip_ctx * ctx);
33 changes: 16 additions & 17 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,6 @@ struct clip_logger_state g_logger_state = {GGML_LOG_LEVEL_CONT, clip_log_callbac

//#define CLIP_DEBUG_FUNCTIONS

// RGB uint8 image
struct clip_image_u8 {
int nx;
int ny;

std::vector<uint8_t> buf;
};

// RGB float32 image (NHWC)
// Memory layout: RGBRGBRGB...
struct clip_image_f32 {
int nx;
int ny;

std::vector<float> buf;
};

#ifdef CLIP_DEBUG_FUNCTIONS
static void clip_image_write_image_to_ppm(const clip_image_u8& img, const std::string& filename) {
std::ofstream file(filename, std::ios::binary);
Expand Down Expand Up @@ -1614,6 +1597,12 @@ struct clip_image_f32 * clip_image_f32_init() {
return new clip_image_f32();
}

unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny) {
if (nx) *nx = img->nx;
if (ny) *ny = img->ny;
return img->buf.data();
}

void clip_image_size_free(struct clip_image_size * load_image_size) {
if (load_image_size == nullptr) {
return;
Expand Down Expand Up @@ -2346,6 +2335,8 @@ int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * i
int x_patch = img->nx / patch_size + (int)(img->nx % patch_size > 0);
int y_patch = img->ny / patch_size + (int)(img->ny % patch_size > 0);
n_patches = x_patch * y_patch;
} else if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) {
n_patches = 256;
}

return n_patches;
Expand Down Expand Up @@ -2893,3 +2884,11 @@ bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img,
clip_image_encode(ctx, n_threads, &clip_img, vec);
return true;
}

//
// API used internally with mtmd
//

projector_type clip_get_projector_type(const struct clip_ctx * ctx) {
return ctx->proj_type;
}
3 changes: 3 additions & 0 deletions examples/llava/clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ CLIP_API struct clip_image_size * clip_image_size_init();
CLIP_API struct clip_image_u8 * clip_image_u8_init ();
CLIP_API struct clip_image_f32 * clip_image_f32_init();

// nx, ny are the output image dimensions
CLIP_API unsigned char * clip_image_u8_get_data(struct clip_image_u8 * img, uint32_t * nx, uint32_t * ny);

CLIP_API void clip_image_size_free (struct clip_image_size * img_size);
CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
Expand Down
Loading