Skip to content

Commit d357ef5

Browse files
authored
Merge branch 'master' into numa-mirror
2 parents c665d3c + b8e09f0 commit d357ef5

30 files changed

+1135
-1174
lines changed

.devops/rocm.Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ARG UBUNTU_VERSION=24.04
44
ARG ROCM_VERSION=6.4
55
ARG AMDGPU_VERSION=6.4
66

7-
# Target the CUDA build image
7+
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
99

1010
### Build image
@@ -15,12 +15,12 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1515
# This is mostly tied to rocBLAS supported archs.
1616
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
1717
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
18+
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1919

20-
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
20+
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201'
2121
#ARG ROCM_DOCKER_ARCH=gfx1100
2222

23-
# Set nvcc architectured
23+
# Set ROCm architectured
2424
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
2525
# Enable ROCm
2626
# ENV CC=/opt/rocm/llvm/bin/clang

.github/workflows/build.yml

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ jobs:
127127
-DCMAKE_BUILD_RPATH="@loader_path" \
128128
-DLLAMA_FATAL_WARNINGS=ON \
129129
-DGGML_METAL=OFF \
130-
-DGGML_RPC=ON
130+
-DGGML_RPC=ON \
131+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
131132
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
132133
133134
- name: Test
@@ -1051,9 +1052,13 @@ jobs:
10511052
run: examples/sycl/win-build-sycl.bat
10521053

10531054
windows-latest-cmake-hip:
1054-
if: ${{ github.event.inputs.create_release != 'true' }}
10551055
runs-on: windows-2022
10561056

1057+
env:
1058+
# The ROCm version must correspond to the version used in the HIP SDK.
1059+
ROCM_VERSION: "6.4.2"
1060+
HIPSDK_INSTALLER_VERSION: "25.Q3"
1061+
10571062
steps:
10581063
- name: Clone
10591064
id: checkout
@@ -1062,24 +1067,22 @@ jobs:
10621067
- name: Clone rocWMMA repository
10631068
id: clone_rocwmma
10641069
run: |
1065-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
1070+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
10661071
10671072
- name: Cache ROCm Installation
10681073
id: cache-rocm
10691074
uses: actions/cache@v4
10701075
with:
10711076
path: C:\Program Files\AMD\ROCm
1072-
key: rocm-6.1-${{ runner.os }}-v1
1073-
restore-keys: |
1074-
rocm-6.1-${{ runner.os }}-
1077+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
10751078

10761079
- name: Install ROCm
10771080
if: steps.cache-rocm.outputs.cache-hit != 'true'
10781081
id: depends
10791082
run: |
10801083
$ErrorActionPreference = "Stop"
10811084
write-host "Downloading AMD HIP SDK Installer"
1082-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1085+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
10831086
write-host "Installing AMD HIP SDK"
10841087
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
10851088
$completed = $proc.WaitForExit(600000)

.github/workflows/release.yml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ jobs:
108108
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
109109
-DLLAMA_FATAL_WARNINGS=ON \
110110
-DGGML_METAL=OFF \
111-
-DGGML_RPC=ON
111+
-DGGML_RPC=ON \
112+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
112113
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
113114
114115
- name: Determine tag name
@@ -528,11 +529,16 @@ jobs:
528529
windows-hip:
529530
runs-on: windows-2022
530531

532+
env:
533+
# The ROCm version must correspond to the version used in the HIP SDK.
534+
ROCM_VERSION: "6.4.2"
535+
HIPSDK_INSTALLER_VERSION: "25.Q3"
536+
531537
strategy:
532538
matrix:
533539
include:
534540
- name: "radeon"
535-
gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
541+
gpu_targets: "gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
536542

537543
steps:
538544
- name: Clone
@@ -542,21 +548,19 @@ jobs:
542548
- name: Clone rocWMMA repository
543549
id: clone_rocwmma
544550
run: |
545-
git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1
551+
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
546552
547553
- name: Cache ROCm Installation
548554
id: cache-rocm
549555
uses: actions/cache@v4
550556
with:
551557
path: C:\Program Files\AMD\ROCm
552-
key: rocm-6.1-${{ runner.os }}-v1
553-
restore-keys: |
554-
rocm-6.1-${{ runner.os }}-
558+
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
555559

556560
- name: ccache
557561
uses: ggml-org/[email protected]
558562
with:
559-
key: windows-latest-cmake-hip-${{ matrix.name }}-x64
563+
key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
560564
evict-old-files: 1d
561565

562566
- name: Install ROCm
@@ -565,7 +569,7 @@ jobs:
565569
run: |
566570
$ErrorActionPreference = "Stop"
567571
write-host "Downloading AMD HIP SDK Installer"
568-
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
572+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
569573
write-host "Installing AMD HIP SDK"
570574
$proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
571575
$completed = $proc.WaitForExit(600000)
@@ -610,9 +614,12 @@ jobs:
610614
-DLLAMA_CURL=OFF
611615
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
612616
md "build\bin\rocblas\library\"
617+
md "build\bin\hipblaslt\library"
613618
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
619+
cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\"
614620
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
615621
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
622+
cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"
616623
617624
- name: Pack artifacts
618625
id: pack_artifacts

common/common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,9 +292,9 @@ struct common_params {
292292
float rope_freq_base = 0.0f; // RoPE base frequency
293293
float rope_freq_scale = 0.0f; // RoPE frequency scaling factor
294294
float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor
295-
float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor
296-
float yarn_beta_fast = 32.0f; // YaRN low correction dim
297-
float yarn_beta_slow = 1.0f; // YaRN high correction dim
295+
float yarn_attn_factor = -1.0f; // YaRN magnitude scaling factor
296+
float yarn_beta_fast = -1.0f; // YaRN low correction dim
297+
float yarn_beta_slow = -1.0f; // YaRN high correction dim
298298
int32_t yarn_orig_ctx = 0; // YaRN original context length
299299

300300
// offload params

convert_hf_to_gguf.py

Lines changed: 78 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
735735
if chkhsh == "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c":
736736
# ref: https://huggingface.co/Qwen/Qwen3-Embedding-0.6B
737737
res = "qwen2"
738+
if chkhsh == "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273":
739+
# ref: https://huggingface.co/alvarobartt/grok-2-tokenizer
740+
res = "grok-2"
738741
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
739742
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
740743
res = "llama-bpe"
@@ -2682,57 +2685,109 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
26822685
yield (new_name, data_torch)
26832686

26842687

2685-
@ModelBase.register("GrokForCausalLM")
2688+
@ModelBase.register("GrokForCausalLM", "Grok1ForCausalLM")
26862689
class GrokModel(TextModel):
26872690
model_arch = gguf.MODEL_ARCH.GROK
26882691

26892692
def set_vocab(self):
2690-
self._set_vocab_sentencepiece()
2693+
if (self.dir_model / 'tokenizer.model').is_file():
2694+
self._set_vocab_sentencepiece()
2695+
return
2696+
2697+
if not (self.dir_model / 'tokenizer.json').is_file() or not (self.dir_model / 'chat_template.jinja').is_file():
2698+
logger.error('Error: Missing vocab and chat template, download files from https://huggingface.co/alvarobartt/grok-2-tokenizer')
2699+
sys.exit(1)
2700+
2701+
self._set_vocab_gpt2()
26912702

26922703
def __init__(self, *args, **kwargs):
26932704
super().__init__(*args, **kwargs)
26942705

26952706
def set_gguf_parameters(self):
26962707
super().set_gguf_parameters()
26972708

2698-
_experts: list[dict[str, Tensor]] | None = None
2709+
self.gguf_writer.add_attn_logit_softcapping(self.hparams.get("attn_logit_softcapping", 30.0))
2710+
self.gguf_writer.add_router_logit_softcapping(self.hparams.get("router_logit_softcapping", 30.0))
2711+
if (final_logit_softcap := self.hparams.get("final_logit_softcapping")):
2712+
self.gguf_writer.add_final_logit_softcapping(final_logit_softcap)
2713+
2714+
if (rope_dim := self.hparams.get("head_dim")) is None:
2715+
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
2716+
2717+
if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
2718+
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
2719+
2720+
# Treat "original" as "yarn", seems to have been a mistake
2721+
if self.hparams.get("rope_type") in ("yarn", "original"):
2722+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
2723+
self.gguf_writer.add_rope_scaling_factor(self.hparams["scaling_factor"])
2724+
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["original_max_position_embeddings"])
2725+
self.gguf_writer.add_rope_scaling_yarn_ext_factor(self.hparams["extrapolation_factor"])
2726+
self.gguf_writer.add_rope_scaling_yarn_attn_factor(self.hparams["attn_factor"])
2727+
self.gguf_writer.add_rope_scaling_yarn_beta_fast(self.hparams["beta_fast"])
2728+
self.gguf_writer.add_rope_scaling_yarn_beta_slow(self.hparams["beta_slow"])
2729+
2730+
if temp_len := self.hparams.get("attn_temperature_len"):
2731+
self.gguf_writer.add_attn_temperature_length(temp_len)
2732+
2733+
self.gguf_writer.add_attn_output_scale(self.hparams.get("attn_output_multiplier", rope_dim**-0.5))
2734+
self.gguf_writer.add_embedding_scale(self.hparams["embedding_multiplier_scale"])
2735+
self.gguf_writer.add_logit_scale(self.hparams["output_multiplier_scale"])
2736+
2737+
_experts: list[dict[str, list[Tensor]]] | None = None
2738+
_cur_expert = ""
26992739

27002740
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2741+
tensors: list[tuple[str, Tensor]] = []
2742+
is_expert = ".moe." in name or ".block_sparse_moe.experts." in name
2743+
2744+
if not is_expert:
2745+
tensors.append((self.map_tensor_name(name), data_torch))
2746+
27012747
# process the experts separately
2702-
if name.find(".moe.") != -1:
2748+
if is_expert or self._cur_expert:
27032749
n_experts = self.hparams["num_local_experts"]
27042750

27052751
assert bid is not None
27062752

27072753
if self._experts is None:
27082754
self._experts = [{} for _ in range(self.block_count)]
27092755

2710-
self._experts[bid][name] = data_torch
2756+
# concatenate split tensors
2757+
if name in self._experts[bid]:
2758+
self._cur_expert = name
2759+
self._experts[bid][name].append(data_torch)
2760+
return []
2761+
elif is_expert:
2762+
self._cur_expert = name
2763+
self._experts[bid][name] = [data_torch]
2764+
return []
2765+
else:
2766+
self._cur_expert = ""
27112767

2712-
if len(self._experts[bid]) >= n_experts * 3:
2713-
tensors: list[tuple[str, Tensor]] = []
2768+
for bid in range(self.block_count):
2769+
if len(self._experts[bid]) >= n_experts * 3:
2770+
# merge the experts into a single 3d tensor
2771+
for wid in [("linear", "w1", 0), ("linear_1", "w2", 1), ("linear_v", "w3", 0)]:
2772+
datas: list[Tensor] = []
27142773

2715-
# merge the experts into a single 3d tensor
2716-
for wid in ["linear", "linear_1", "linear_v"]:
2717-
datas: list[Tensor] = []
2774+
for xid in range(n_experts):
2775+
ename = f"transformer.decoder_layer.{bid}.moe.{xid}.{wid[0]}.weight"
2776+
if ename not in self._experts[bid]:
2777+
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.{wid[1]}.weight"
2778+
tensor_list = self._experts[bid][ename]
2779+
datas.append(torch.cat(tensor_list, dim=wid[2]) if len(tensor_list) > 1 else tensor_list[0])
2780+
del self._experts[bid][ename]
27182781

2719-
for xid in range(n_experts):
2720-
ename = f"transformer.decoder_layer.{bid}.moe.{xid}.{wid}.weight"
2721-
datas.append(self._experts[bid][ename])
2722-
del self._experts[bid][ename]
2782+
data_torch = torch.stack(datas, dim=0)
27232783

2724-
data_torch = torch.stack(datas, dim=0)
2784+
merged_name = f"transformer.decoder_layer.{bid}.moe.{wid[0]}.weight"
27252785

2726-
merged_name = f"transformer.decoder_layer.{bid}.moe.{wid}.weight"
2727-
2728-
new_name = self.map_tensor_name(merged_name)
2786+
new_name = self.map_tensor_name(merged_name)
27292787

2730-
tensors.append((new_name, data_torch))
2731-
return tensors
2732-
else:
2733-
return []
2788+
yield (new_name, data_torch)
27342789

2735-
return [(self.map_tensor_name(name), data_torch)]
2790+
yield from tensors
27362791

27372792

27382793
@ModelBase.register("DbrxForCausalLM")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class TOKENIZER_TYPE(IntEnum):
158158
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"},
159159
{"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
160160
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
161+
{"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
161162
]
162163

163164

0 commit comments

Comments
 (0)