Skip to content

Commit 8a04972

Browse files
authored
Merge pull request #59 from l3utterfly/master
merge from upstream
2 parents 098ac44 + 53af4db commit 8a04972

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+3211
-1071
lines changed

.github/workflows/build.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,35 @@ jobs:
676676
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
677677
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
678678
679+
macOS-latest-cmake-visionos:
680+
runs-on: macos-latest
681+
682+
steps:
683+
- name: Clone
684+
id: checkout
685+
uses: actions/checkout@v4
686+
687+
- name: Dependencies
688+
id: depends
689+
continue-on-error: true
690+
run: |
691+
brew update
692+
693+
- name: Build
694+
id: cmake_build
695+
run: |
696+
sysctl -a
697+
cmake -B build -G Xcode \
698+
-DGGML_METAL_USE_BF16=ON \
699+
-DGGML_METAL_EMBED_LIBRARY=ON \
700+
-DLLAMA_BUILD_EXAMPLES=OFF \
701+
-DLLAMA_BUILD_TESTS=OFF \
702+
-DLLAMA_BUILD_SERVER=OFF \
703+
-DCMAKE_SYSTEM_NAME=visionOS \
704+
-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
705+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
706+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
707+
679708
macOS-latest-swift:
680709
runs-on: macos-latest
681710

build-xcframework.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,8 @@ cmake -B build-visionos -G Xcode \
432432
-DCMAKE_SYSTEM_NAME=visionOS \
433433
-DCMAKE_OSX_SYSROOT=xros \
434434
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
435-
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
436-
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
435+
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
436+
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
437437
-S .
438438
cmake --build build-visionos --config Release -- -quiet
439439

@@ -445,8 +445,8 @@ cmake -B build-visionos-sim -G Xcode \
445445
-DCMAKE_SYSTEM_NAME=visionOS \
446446
-DCMAKE_OSX_SYSROOT=xrsimulator \
447447
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
448-
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
449-
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
448+
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
449+
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
450450
-S .
451451
cmake --build build-visionos-sim --config Release -- -quiet
452452

ci/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,43 @@ GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
2626
# with SYCL support
2727
source /opt/intel/oneapi/setvars.sh
2828
GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
29+
30+
# with MUSA support
31+
GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
32+
```
33+
34+
## Running MUSA CI in a Docker Container
35+
36+
Assuming `$PWD` is the root of the `llama.cpp` repository, follow these steps to set up and run MUSA CI in a Docker container:
37+
38+
### 1. Create a local directory to store cached models, configuration files and venv:
39+
40+
```bash
41+
mkdir -p $HOME/llama.cpp/ci-cache
42+
```
43+
44+
### 2. Create a local directory to store CI run results:
45+
46+
```bash
47+
mkdir -p $HOME/llama.cpp/ci-results
48+
```
49+
50+
### 3. Start a Docker container and run the CI:
51+
52+
```bash
53+
docker run --privileged -it \
54+
-v $HOME/llama.cpp/ci-cache:/ci-cache \
55+
-v $HOME/llama.cpp/ci-results:/ci-results \
56+
-v $PWD:/ws -w /ws \
57+
mthreads/musa:rc3.1.1-devel-ubuntu22.04
2958
```
59+
60+
Inside the container, execute the following commands:
61+
62+
```bash
63+
apt update -y && apt install -y cmake git python3.10-venv wget
64+
git config --global --add safe.directory /ws
65+
GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
66+
```
67+
68+
This setup ensures that the CI runs within an isolated Docker environment while maintaining cached files and results across runs.

ci/run.sh

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
# # with VULKAN support
1717
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1818
#
19+
# # with MUSA support
20+
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
21+
#
1922

2023
if [ -z "$2" ]; then
2124
echo "usage: $0 <output-dir> <mnt-dir>"
@@ -52,13 +55,22 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
5255
echo "source /opt/intel/oneapi/setvars.sh"
5356
exit 1
5457
fi
55-
58+
# Use only main GPU
59+
export ONEAPI_DEVICE_SELECTOR="level_zero:0"
60+
# Enable sysman for correct memory reporting
61+
export ZES_ENABLE_SYSMAN=1
5662
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
5763
fi
5864

5965
if [ ! -z ${GG_BUILD_VULKAN} ]; then
6066
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
6167
fi
68+
69+
if [ ! -z ${GG_BUILD_MUSA} ]; then
70+
# Use qy1 by default (MTT S80)
71+
MUSA_ARCH=${MUSA_ARCH:-21}
72+
CMAKE_EXTRA="-DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
73+
fi
6274
## helpers
6375

6476
# download a file if it does not exist or if it is outdated
@@ -808,7 +820,7 @@ export LLAMA_LOG_PREFIX=1
808820
export LLAMA_LOG_TIMESTAMPS=1
809821

810822
if [ -z ${GG_BUILD_LOW_PERF} ]; then
811-
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
823+
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models
812824
rm -rf ${SRC}/models-mnt
813825
mnt_models=${MNT}/models
814826
mkdir -p ${mnt_models}
@@ -826,16 +838,20 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
826838
fi
827839

828840
ret=0
829-
830-
test $ret -eq 0 && gg_run ctest_debug
841+
if [ -z ${GG_BUILD_SYCL} ]; then
842+
# SYCL build breaks with debug build flags
843+
test $ret -eq 0 && gg_run ctest_debug
844+
fi
831845
test $ret -eq 0 && gg_run ctest_release
832846

833847
if [ -z ${GG_BUILD_LOW_PERF} ]; then
834848
test $ret -eq 0 && gg_run embd_bge_small
835849
test $ret -eq 0 && gg_run rerank_tiny
836850

837851
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
838-
test $ret -eq 0 && gg_run test_scripts_debug
852+
if [ -z ${GG_BUILD_SYCL} ]; then
853+
test $ret -eq 0 && gg_run test_scripts_debug
854+
fi
839855
test $ret -eq 0 && gg_run test_scripts_release
840856
fi
841857

@@ -846,7 +862,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
846862
test $ret -eq 0 && gg_run pythia_2_8b
847863
#test $ret -eq 0 && gg_run open_llama_7b_v2
848864
fi
849-
test $ret -eq 0 && gg_run ctest_with_model_debug
865+
if [ -z ${GG_BUILD_SYCL} ]; then
866+
test $ret -eq 0 && gg_run ctest_with_model_debug
867+
fi
850868
test $ret -eq 0 && gg_run ctest_with_model_release
851869
fi
852870
fi

convert_hf_to_gguf.py

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
180180
extra = sorted(tensor_names_from_parts.difference(self.tensor_names))
181181
missing_files = sorted(set(weight_map[n] for n in missing if n in weight_map))
182182
if len(extra) == 0 and len(missing_files) > 0:
183-
raise ValueError(f"Missing or incomplete model files: {missing_files}")
183+
raise ValueError(f"Missing or incomplete model files: {missing_files}\n"
184+
f"Missing tensors: {missing}")
184185
else:
185186
raise ValueError("Mismatch between weight map and model parts for tensor names:\n"
186187
f"Missing tensors: {missing}\n"
@@ -528,6 +529,8 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
528529
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
529530
added_vocab = tokenizer.get_added_vocab()
530531

532+
added_tokens_decoder = tokenizer.added_tokens_decoder
533+
531534
for i in range(vocab_size):
532535
if i not in reverse_vocab:
533536
tokens.append(f"[PAD{i}]")
@@ -537,13 +540,13 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
537540
if token in added_vocab:
538541
# The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
539542
# To avoid unexpected issues - we make sure to normalize non-normalized tokens
540-
if not tokenizer.added_tokens_decoder[i].normalized:
543+
if not added_tokens_decoder[i].normalized:
541544
previous_token = token
542545
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
543546
if previous_token != token:
544547
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
545548

546-
if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
549+
if added_tokens_decoder[i].special or self.does_token_look_special(token):
547550
toktypes.append(gguf.TokenType.CONTROL)
548551
else:
549552
# NOTE: this was added for Gemma.
@@ -702,6 +705,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
702705
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
703706
# ref: https://huggingface.co/Xenova/gpt-4o
704707
res = "gpt-4o"
708+
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709+
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710+
res = "superbpe"
705711

706712
if res is None:
707713
logger.warning("\n")
@@ -1099,13 +1105,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
10991105

11001106
tensors.append((self.map_tensor_name(name), data_torch))
11011107

1102-
if name == "word_embeddings.weight":
1103-
assert self.tensor_names is not None
1104-
1105-
# TODO: tie them at runtime, don't duplicate in the model file
1106-
if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
1107-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
1108-
11091108
return tensors
11101109

11111110

@@ -1747,6 +1746,25 @@ def prepare_tensors(self):
17471746
raise ValueError(f"Unprocessed experts: {experts}")
17481747

17491748

1749+
@Model.register("Mistral3ForConditionalGeneration")
1750+
class Mistral3Model(LlamaModel):
1751+
model_arch = gguf.MODEL_ARCH.LLAMA
1752+
1753+
# we need to merge the text_config into the root level of hparams
1754+
def __init__(self, *args, **kwargs):
1755+
hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
1756+
if "text_config" in hparams:
1757+
hparams = {**hparams, **hparams["text_config"]}
1758+
kwargs["hparams"] = hparams
1759+
super().__init__(*args, **kwargs)
1760+
1761+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
1762+
name = name.replace("language_model.", "")
1763+
if "multi_modal_projector" in name or "vision_tower" in name:
1764+
return []
1765+
return super().modify_tensors(data_torch, name, bid)
1766+
1767+
17501768
@Model.register("DeciLMForCausalLM")
17511769
class DeciModel(Model):
17521770
model_arch = gguf.MODEL_ARCH.DECI
@@ -2404,10 +2422,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
24042422

24052423
tensors.append((new_name, data_torch))
24062424

2407-
# note: GPT2 output is tied to (same as) wte in original model
2408-
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
2409-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
2410-
24112425
return tensors
24122426

24132427

@@ -2737,21 +2751,26 @@ def set_gguf_parameters(self):
27372751
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
27382752
self.gguf_writer.add_rope_scaling_factor(1.0)
27392753

2754+
_has_tok_embd = False
2755+
27402756
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
27412757
del bid # unused
27422758

2743-
new_name = self.map_tensor_name(name)
2744-
2745-
tensors: list[tuple[str, Tensor]] = [(new_name, data_torch)]
2759+
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
2760+
tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)
27462761

2747-
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
2748-
assert self.tensor_names is not None
2762+
new_name = self.map_tensor_name(name)
27492763

2750-
if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
2751-
# copy tok_embd.weight to output.weight
2752-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
2764+
# assuming token_embd.weight is seen before output.weight
2765+
if not self._has_tok_embd and new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
2766+
# even though the tensor file(s) does not contain the word embeddings they are still in the weight map
2767+
if self.tensor_names and "transformer.wte.weight" in self.tensor_names:
2768+
logger.debug(f"{tok_embd_name} not found before {output_name}, assuming they are tied")
2769+
self.tensor_names.remove("transformer.wte.weight")
2770+
elif new_name == tok_embd_name:
2771+
self._has_tok_embd = True
27532772

2754-
return tensors
2773+
return [(new_name, data_torch)]
27552774

27562775

27572776
@Model.register("InternLM2ForCausalLM")
@@ -3366,7 +3385,7 @@ class Gemma3Model(Model):
33663385

33673386
# we need to merge the text_config into the root level of hparams
33683387
def __init__(self, *args, **kwargs):
3369-
hparams = Model.load_hparams(kwargs["dir_model"])
3388+
hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
33703389
if "text_config" in hparams:
33713390
hparams = {**hparams, **hparams["text_config"]}
33723391
kwargs["hparams"] = hparams
@@ -5339,7 +5358,7 @@ def main() -> None:
53395358
logger.error(f"Model {model_architecture} is not supported")
53405359
sys.exit(1)
53415360

5342-
model_instance = model_class(dir_model=dir_model, ftype=output_type, fname_out=fname_out,
5361+
model_instance = model_class(dir_model, output_type, fname_out,
53435362
is_big_endian=args.bigendian, use_temp_file=args.use_temp_file,
53445363
eager=args.no_lazy,
53455364
metadata_override=args.metadata, model_name=args.model_name,

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class TOKENIZER_TYPE(IntEnum):
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113+
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
113114
]
114115

115116

0 commit comments

Comments
 (0)