Skip to content

Commit b7420c0

Browse files
committed
Merge remote-tracking branch 'ggerganov/master' into nsigma
2 parents 9152659 + b34c859 commit b7420c0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1448
-729
lines changed

.github/workflows/build.yml

Lines changed: 26 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ jobs:
771771
uses: hendrikmuhs/[email protected]
772772
with:
773773
key: windows-msys2
774-
variant: sccache
774+
variant: ccache
775775
evict-old-files: 1d
776776

777777
- name: Setup ${{ matrix.sys }}
@@ -814,26 +814,18 @@ jobs:
814814
strategy:
815815
matrix:
816816
include:
817-
- build: 'noavx-x64'
818-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
819-
- build: 'avx2-x64'
820-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
821-
- build: 'avx-x64'
822-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
823-
- build: 'avx512-x64'
824-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
817+
- build: 'cpu-x64'
818+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
825819
- build: 'openblas-x64'
826-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
827-
- build: 'kompute-x64'
828-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
820+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
829821
- build: 'vulkan-x64'
830-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
822+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
831823
- build: 'llvm-arm64'
832824
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
833-
- build: 'msvc-arm64'
834-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
835825
- build: 'llvm-arm64-opencl-adreno'
836826
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
827+
# - build: 'kompute-x64'
828+
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
837829

838830
steps:
839831
- name: Clone
@@ -846,7 +838,7 @@ jobs:
846838
uses: hendrikmuhs/[email protected]
847839
with:
848840
key: windows-latest-cmake-${{ matrix.build }}
849-
variant: sccache
841+
variant: ccache
850842
evict-old-files: 1d
851843

852844
- name: Clone Kompute submodule
@@ -922,39 +914,26 @@ jobs:
922914
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
923915
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
924916
925-
- name: Check AVX512F support
926-
id: check_avx512f
927-
if: ${{ matrix.build == 'avx512-x64' }}
928-
continue-on-error: true
929-
run: |
930-
cd build
931-
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
932-
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
933-
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
934-
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
935-
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
936-
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
937-
938917
- name: Test
939918
id: cmake_test
940-
# not all machines have native AVX-512
941-
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
919+
if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
942920
run: |
943921
cd build
944922
ctest -L main -C Release --verbose --timeout 900
945923
946-
- name: Test (Intel SDE)
947-
id: cmake_test_sde
948-
if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
949-
run: |
950-
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
951-
# for some weird reason windows tar doesn't like sde tar.xz
952-
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
953-
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
954-
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
955-
cd build
956-
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
957-
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
924+
# TODO: disabled for now, consider adding tests for all CPU variants instead
925+
# - name: Test (Intel SDE)
926+
# id: cmake_test_sde
927+
# if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
928+
# run: |
929+
# curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
930+
# # for some weird reason windows tar doesn't like sde tar.xz
931+
# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
932+
# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
933+
# $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
934+
# cd build
935+
# $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
936+
# & $sde -future -- ctest -L main -C Release --verbose --timeout 900
958937

959938
- name: Determine tag name
960939
id: tag
@@ -1039,7 +1018,7 @@ jobs:
10391018
uses: hendrikmuhs/[email protected]
10401019
with:
10411020
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
1042-
variant: sccache
1021+
variant: ccache
10431022
evict-old-files: 1d
10441023

10451024
- name: Install Cuda Toolkit 11.7
@@ -1117,6 +1096,8 @@ jobs:
11171096
cmake -S . -B build -G "Ninja Multi-Config" ^
11181097
-DLLAMA_BUILD_SERVER=ON ^
11191098
-DGGML_NATIVE=OFF ^
1099+
-DGGML_BACKEND_DL=ON ^
1100+
-DGGML_CPU_ALL_VARIANTS=ON ^
11201101
-DGGML_CUDA=ON ^
11211102
-DGGML_RPC=ON ^
11221103
-DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include"
@@ -1191,7 +1172,7 @@ jobs:
11911172
uses: hendrikmuhs/[email protected]
11921173
with:
11931174
key: windows-latest-cmake-sycl
1194-
variant: sccache
1175+
variant: ccache
11951176
evict-old-files: 1d
11961177

11971178
- name: Install

Makefile

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,36 +1394,36 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
13941394
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13951395
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
13961396

1397-
libllava.a: tools/llava/llava.cpp \
1398-
tools/llava/llava.h \
1399-
tools/llava/clip.cpp \
1400-
tools/llava/clip.h \
1397+
libllava.a: tools/mtmd/llava.cpp \
1398+
tools/mtmd/llava.h \
1399+
tools/mtmd/clip.cpp \
1400+
tools/mtmd/clip.h \
14011401
common/stb_image.h \
14021402
common/base64.hpp \
14031403
$(OBJ_ALL)
14041404
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
14051405

1406-
llama-llava-cli: tools/llava/llava-cli.cpp \
1407-
tools/llava/llava.cpp \
1408-
tools/llava/llava.h \
1409-
tools/llava/clip.cpp \
1410-
tools/llava/clip.h \
1406+
llama-llava-cli: tools/mtmd/llava-cli.cpp \
1407+
tools/mtmd/llava.cpp \
1408+
tools/mtmd/llava.h \
1409+
tools/mtmd/clip.cpp \
1410+
tools/mtmd/clip.h \
14111411
$(OBJ_ALL)
14121412
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14131413

1414-
llama-minicpmv-cli: tools/llava/minicpmv-cli.cpp \
1415-
tools/llava/llava.cpp \
1416-
tools/llava/llava.h \
1417-
tools/llava/clip.cpp \
1418-
tools/llava/clip.h \
1414+
llama-minicpmv-cli: tools/mtmd/minicpmv-cli.cpp \
1415+
tools/mtmd/llava.cpp \
1416+
tools/mtmd/llava.h \
1417+
tools/mtmd/clip.cpp \
1418+
tools/mtmd/clip.h \
14191419
$(OBJ_ALL)
14201420
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14211421

1422-
llama-qwen2vl-cli: tools/llava/qwen2vl-cli.cpp \
1423-
tools/llava/llava.cpp \
1424-
tools/llava/llava.h \
1425-
tools/llava/clip.cpp \
1426-
tools/llava/clip.h \
1422+
llama-qwen2vl-cli: tools/mtmd/qwen2vl-cli.cpp \
1423+
tools/mtmd/llava.cpp \
1424+
tools/mtmd/llava.h \
1425+
tools/mtmd/clip.cpp \
1426+
tools/mtmd/clip.h \
14271427
$(OBJ_ALL)
14281428
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14291429

cmake/arm64-windows-msvc.cmake

Lines changed: 0 additions & 6 deletions
This file was deleted.

cmake/x64-windows-llvm.cmake

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,3 @@ set( CMAKE_SYSTEM_PROCESSOR x86_64 )
33

44
set( CMAKE_C_COMPILER clang )
55
set( CMAKE_CXX_COMPILER clang++ )
6-
7-
set( arch_c_flags "-march=native" )
8-
9-
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
10-
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
11-

common/arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,14 +2211,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22112211
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
22122212
add_opt(common_arg(
22132213
{"--mmproj"}, "FILE",
2214-
"path to a multimodal projector file. see tools/llava/README.md",
2214+
"path to a multimodal projector file. see tools/mtmd/README.md",
22152215
[](common_params & params, const std::string & value) {
22162216
params.mmproj.path = value;
22172217
}
22182218
).set_examples(mmproj_examples));
22192219
add_opt(common_arg(
22202220
{"--mmproj-url"}, "URL",
2221-
"URL to a multimodal projector file. see tools/llava/README.md",
2221+
"URL to a multimodal projector file. see tools/mtmd/README.md",
22222222
[](common_params & params, const std::string & value) {
22232223
params.mmproj.url = value;
22242224
}

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ struct common_params {
342342

343343
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
344344

345-
// multimodal models (see tools/llava)
345+
// multimodal models (see tools/mtmd)
346346
struct common_params_model mmproj;
347347
bool mmproj_use_gpu = true; // use GPU for multimodal model
348348
bool no_mmproj = false; // explicitly disable multimodal model

convert_hf_to_gguf.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,6 +1778,12 @@ class LlamaModel(TextModel):
17781778
model_arch = gguf.MODEL_ARCH.LLAMA
17791779
undo_permute = True
17801780

1781+
def __init__(self, *args, **kwargs):
1782+
super().__init__(*args, **kwargs)
1783+
# fix for SmolVLM2, missing `num_attention_heads` in config.json
1784+
if self.hf_arch == "VLlama3ForCausalLM":
1785+
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
1786+
17811787
def set_vocab(self):
17821788
try:
17831789
self._set_vocab_sentencepiece()
@@ -2123,6 +2129,9 @@ def __init__(self, *args, **kwargs):
21232129
# if n_heads_in_group is not None, then
21242130
# _num_kv_heads[il] is num_attention_head // n_heads_in_group and
21252131
# _num_heads[il] is num_attention_head
2132+
# ***dummy layer*** for nemotron 253B
2133+
# if n_heads_in_group is None and ffn_mult is None
2134+
# then _num_kv_heads[il] is 0 and _num_heads[il] is 0 and _ffn_dims is 0
21262135
for il in range(len(_block_configs)):
21272136
if _block_configs[il]["attention"]["n_heads_in_group"] is None:
21282137
if _block_configs[il]["attention"]["replace_with_linear"] is True:
@@ -2134,7 +2143,10 @@ def __init__(self, *args, **kwargs):
21342143
else:
21352144
self._num_kv_heads.append(self.hparams["num_attention_heads"] // _block_configs[il]["attention"]["n_heads_in_group"])
21362145
self._num_heads.append(self.hparams["num_attention_heads"])
2137-
_ffn_multipliers.append(_block_configs[il]["ffn"]["ffn_mult"])
2146+
if _block_configs[il]["ffn"]["ffn_mult"] is None: # dummy layer
2147+
_ffn_multipliers.append(0.0)
2148+
else:
2149+
_ffn_multipliers.append(_block_configs[il]["ffn"]["ffn_mult"])
21382150
assert self.block_count == len(self._num_kv_heads)
21392151
assert self.block_count == len(self._num_heads)
21402152
assert self.block_count == len(_ffn_multipliers)
@@ -5674,7 +5686,12 @@ def set_gguf_parameters(self):
56745686
rope_dim = hparams.get("head_dim") or hparams["hidden_size"] // hparams["num_attention_heads"]
56755687

56765688
self.gguf_writer.add_rope_dimension_count(rope_dim)
5677-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
5689+
if (self.hparams.get("rope_scaling") or {}).get("type") == "yarn" and "factor" in self.hparams["rope_scaling"]:
5690+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
5691+
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
5692+
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
5693+
else:
5694+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
56785695
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
56795696
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
56805697
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])

docs/multimodal/MobileVLM.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,21 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
3333
2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
3434

3535
```sh
36-
python ./tools/llava/llava_surgery.py -m path/to/MobileVLM-1.7B
36+
python ./tools/mtmd/llava_surgery.py -m path/to/MobileVLM-1.7B
3737
```
3838

3939
3. Use `convert_image_encoder_to_gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
4040

4141
```sh
42-
python ./tools/llava/convert_image_encoder_to_gguf.py \
42+
python ./tools/mtmd/convert_image_encoder_to_gguf.py \
4343
-m path/to/clip-vit-large-patch14-336 \
4444
--llava-projector path/to/MobileVLM-1.7B/llava.projector \
4545
--output-dir path/to/MobileVLM-1.7B \
4646
--projector-type ldp
4747
```
4848

4949
```sh
50-
python ./tools/llava/convert_image_encoder_to_gguf.py \
50+
python ./tools/mtmd/convert_image_encoder_to_gguf.py \
5151
-m path/to/clip-vit-large-patch14-336 \
5252
--llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \
5353
--output-dir path/to/MobileVLM-1.7B_V2 \
@@ -69,10 +69,10 @@ Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directo
6969

7070
## Android compile and run
7171
### compile
72-
refer to `tools/llava/android/build_64.sh`
72+
refer to `tools/mtmd/android/build_64.sh`
7373
```sh
74-
mkdir tools/llava/android/build_64
75-
cd tools/llava/android/build_64
74+
mkdir tools/mtmd/android/build_64
75+
cd tools/mtmd/android/build_64
7676
../build_64.sh
7777
```
7878
### run on Android

docs/multimodal/glmedge.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ git clone https://huggingface.co/THUDM/glm-edge-v-5b or https://huggingface.co/T
2525
2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents:
2626

2727
```sh
28-
python ./tools/llava/glmedge-surgery.py -m ../model_path
28+
python ./tools/mtmd/glmedge-surgery.py -m ../model_path
2929
```
3030

3131
4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF:
3232

3333
```sh
34-
python ./tools/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
34+
python ./tools/mtmd/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
3535
```
3636

3737
5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF:

docs/multimodal/llava.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,19 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
3737
2. Install the required Python packages:
3838

3939
```sh
40-
pip install -r tools/llava/requirements.txt
40+
pip install -r tools/mtmd/requirements.txt
4141
```
4242

4343
3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
4444

4545
```sh
46-
python ./tools/llava/llava_surgery.py -m ../llava-v1.5-7b
46+
python ./tools/mtmd/llava_surgery.py -m ../llava-v1.5-7b
4747
```
4848

4949
4. Use `convert_image_encoder_to_gguf.py` to convert the LLaVA image encoder to GGUF:
5050

5151
```sh
52-
python ./tools/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
52+
python ./tools/mtmd/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
5353
```
5454

5555
5. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF:
@@ -69,12 +69,12 @@ git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
6969
2) Install the required Python packages:
7070

7171
```sh
72-
pip install -r tools/llava/requirements.txt
72+
pip install -r tools/mtmd/requirements.txt
7373
```
7474

7575
3) Use `llava_surgery_v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
7676
```console
77-
python tools/llava/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
77+
python tools/mtmd/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
7878
```
7979
- you will find a llava.projector and a llava.clip file in your model directory
8080

@@ -88,7 +88,7 @@ curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.jso
8888

8989
5) Create the visual gguf model:
9090
```console
91-
python ./tools/llava/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
91+
python ./tools/mtmd/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
9292
```
9393
- This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP
9494

0 commit comments

Comments
 (0)