Skip to content

Commit 6b7d234

Browse files
committed
Rewrite history to fix bad vulkan shader commits without increasing repo size
added dpe colab (+8 squashed commit) Squashed commit: [b8362da] updated lite [ed6c037] move nsigma into the regular sampler stack [ac5f61c] relative filepath fixed [05fe96a] export template [ed0a5a3] nix_example.md: refactor (#1401) * nix_example.md: add override example * nix_example.md: drop graphics example, already basic nixos knowledge * nix_example.md: format * nix_example.md: Vulkan is disabled on macOS Disabled in: NixOS/nixpkgs@1ccd253 * nix_examples.md: nixpkgs.config.cuda{Arches -> Capabilities} Fixes: #1367 [675c62f] AutoGuess: Phi 4 (mini) (#1402) [4bf5698] phrasing [b8c0df0] Add Rep Pen to Top N Sigma sampler chain (#1397) - place after nsigma and before xtc (+3 squashed commit) Squashed commit: [87c52b9] disable VMM from HIP [ee8906f] edit description [e85c0e6] Remove Unnecessary Rep Counting (#1394) * stop counting reps * fix range-based initializer * strike that - reverse it
1 parent 50eae1f commit 6b7d234

File tree

114 files changed

+6933
-2909
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+6933
-2909
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ else
235235
HCC := $(ROCM_PATH)/llvm/bin/clang
236236
HCXX := $(ROCM_PATH)/llvm/bin/clang++
237237
endif
238-
HIPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
238+
HIPFLAGS += -DGGML_USE_HIP -DGGML_HIP_NO_VMM -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
239239
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
240240
HIPLDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
241241
HIPLDFLAGS += -lhipblas -lamdhip64 -lrocblas

colab.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"source": [
4949
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\n",
5050
"\n",
51-
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/Rocinante-12B-v1.1-GGUF/resolve/main/Rocinante-12B-v1.1-Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/Llama-3.1-8B-BookAdventures-GGUF/resolve/main/Llama-3.1-8B-BookAdventures.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/TheDrummer_Cydonia-24B-v2-GGUF/resolve/main/TheDrummer_Cydonia-24B-v2-Q4_K_S.gguf\"]{allow-input: true}\n",
51+
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/Rocinante-12B-v1.1-GGUF/resolve/main/Rocinante-12B-v1.1-Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/Llama-3.1-8B-BookAdventures-GGUF/resolve/main/Llama-3.1-8B-BookAdventures.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/TheDrummer_Cydonia-24B-v2-GGUF/resolve/main/TheDrummer_Cydonia-24B-v2-Q4_K_S.gguf\",\"https://huggingface.co/bartowski/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b-GGUF/resolve/main/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b-IQ4_XS.gguf\"]{allow-input: true}\n",
5252
"Layers = 99 #@param [99]{allow-input: true}\n",
5353
"ContextSize = 4096 #@param [4096,8192] {allow-input: true}\n",
5454
"FlashAttention = True #@param {type:\"boolean\"}\n",

convert_hf_to_gguf.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
699699
if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5":
700700
# ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
701701
res = "deepseek-r1-qwen"
702+
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
703+
# ref: https://huggingface.co/Xenova/gpt-4o
704+
res = "gpt-4o"
702705

703706
if res is None:
704707
logger.warning("\n")
@@ -2512,7 +2515,8 @@ def set_gguf_parameters(self):
25122515
rms_eps = self.find_hparam(["rms_norm_eps"])
25132516
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
25142517
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2515-
rope_dims = n_embd // n_head
2518+
rot_pct = self.hparams.get("partial_rotary_factor", 1.0)
2519+
rope_dims = int(rot_pct * n_embd) // n_head
25162520

25172521
self.gguf_writer.add_context_length(max_pos_embds)
25182522
self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
@@ -2536,7 +2540,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
25362540
n_head = self.find_hparam(["num_attention_heads", "n_head"])
25372541
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
25382542
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
2539-
rope_dims = n_embd // n_head
2543+
rot_pct = self.hparams.get("partial_rotary_factor", 1.0)
2544+
rope_dims = int(rot_pct * n_embd) // n_head
25402545

25412546
# write rope scaling for long context (128k) model
25422547
rope_scaling = self.find_hparam(['rope_scaling'], True)
@@ -2565,7 +2570,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
25652570
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
25662571

25672572
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
2568-
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
2573+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}. long_factors = {len(long_factors)}, short_factors = {len(short_factors)}.')
25692574

25702575
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
25712576
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))

convert_hf_to_gguf_update.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class TOKENIZER_TYPE(IntEnum):
109109
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112+
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
112113
]
113114

114115

@@ -131,6 +132,10 @@ def download_model(model):
131132

132133
files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
133134

135+
if name == "gpt-4o":
136+
# Xenova/gpt-4o is tokenizer-only, it does not contain config.json
137+
files = ["tokenizer.json", "tokenizer_config.json"]
138+
134139
if tokt == TOKENIZER_TYPE.SPM:
135140
files.append("tokenizer.model")
136141

0 commit comments

Comments
 (0)