Skip to content

Commit 17c28b3

Browse files
Merge pull request #282 from menloresearch/update-dev-from-master-2025-10-08-00-32
Sync master with upstream release b6710
2 parents 66ba518 + 74b8fc1 commit 17c28b3

File tree

21 files changed

+969
-267
lines changed

21 files changed

+969
-267
lines changed

.github/workflows/build.yml

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,8 @@ jobs:
444444
# This is using llvmpipe and runs slower than other backends
445445
ctest -L main --verbose --timeout 4200
446446
447-
ubuntu-22-cmake-webgpu:
448-
runs-on: ubuntu-22.04
447+
ubuntu-24-cmake-webgpu:
448+
runs-on: ubuntu-24.04
449449

450450
steps:
451451
- name: Clone
@@ -455,16 +455,34 @@ jobs:
455455
- name: ccache
456456
uses: ggml-org/[email protected]
457457
with:
458-
key: ubuntu-22-cmake-webgpu
458+
key: ubuntu-24-cmake-webgpu
459459
evict-old-files: 1d
460460

461-
- name: Vulkan SDK Dependencies
462-
id: vulkan-depends
461+
- name: Dependencies
462+
id: depends
463463
run: |
464-
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
465-
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
464+
sudo add-apt-repository -y ppa:kisak/kisak-mesa
466465
sudo apt-get update -y
467-
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
466+
sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libcurl4-openssl-dev
467+
468+
- name: Get latest Vulkan SDK version
469+
id: vulkan_sdk_version
470+
run: |
471+
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
472+
473+
- name: Use Vulkan SDK Cache
474+
uses: actions/cache@v4
475+
id: cache-sdk
476+
with:
477+
path: ./vulkan_sdk
478+
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
479+
480+
- name: Setup Vulkan SDK
481+
if: steps.cache-sdk.outputs.cache-hit != 'true'
482+
uses: ./.github/actions/linux-setup-vulkan
483+
with:
484+
path: ./vulkan_sdk
485+
version: ${{ env.VULKAN_SDK_VERSION }}
468486

469487
- name: Dawn Dependency
470488
id: dawn-depends

CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
/ggml/src/ggml-rpc/ @rgerganov
7171
/ggml/src/ggml-threading.* @ggerganov @slaren
7272
/ggml/src/ggml-vulkan/ @0cc4m
73+
/ggml/src/ggml-webgpu/ @reeselevine
7374
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
7475
/ggml/src/ggml.c @ggerganov @slaren
7576
/ggml/src/ggml.cpp @ggerganov @slaren

convert_hf_to_gguf.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8836,6 +8836,75 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
88368836
return [(self.map_tensor_name(name), data_torch)]
88378837

88388838

8839+
@ModelBase.register("Lfm2MoeForCausalLM")
8840+
class LFM2MoeModel(TextModel):
8841+
model_arch = gguf.MODEL_ARCH.LFM2MOE
8842+
8843+
def set_gguf_parameters(self):
8844+
# set num_key_value_heads only for attention layers
8845+
self.hparams["num_key_value_heads"] = [
8846+
self.hparams["num_key_value_heads"] if layer_type == "full_attention" else 0
8847+
for layer_type in self.hparams["layer_types"]
8848+
]
8849+
8850+
super().set_gguf_parameters()
8851+
8852+
self.gguf_writer.add_expert_count(self.hparams["num_experts"])
8853+
self.gguf_writer.add_expert_feed_forward_length(self.hparams["moe_intermediate_size"])
8854+
self.gguf_writer.add_leading_dense_block_count(self.hparams["num_dense_layers"])
8855+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
8856+
8857+
self.gguf_writer.add_vocab_size(self.hparams["vocab_size"])
8858+
self.gguf_writer.add_shortconv_l_cache(self.hparams["conv_L_cache"])
8859+
8860+
# cache for experts weights for merging
8861+
_experts_cache: dict[int, dict[str, Tensor]] = {}
8862+
8863+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
8864+
# conv op requires 2d tensor
8865+
if 'conv.conv' in name:
8866+
data_torch = data_torch.squeeze(1)
8867+
8868+
if name.endswith(".expert_bias"):
8869+
name = name.replace(".expert_bias", ".expert_bias.bias")
8870+
8871+
# merge expert weights
8872+
if 'experts' in name:
8873+
n_experts = self.hparams["num_experts"]
8874+
assert bid is not None
8875+
8876+
expert_cache = self._experts_cache.setdefault(bid, {})
8877+
expert_cache[name] = data_torch
8878+
expert_weights = ["w1", "w2", "w3"]
8879+
8880+
# not enough expert weights to merge
8881+
if len(expert_cache) < n_experts * len(expert_weights):
8882+
return []
8883+
8884+
tensors: list[tuple[str, Tensor]] = []
8885+
for w_name in expert_weights:
8886+
datas: list[Tensor] = []
8887+
8888+
for xid in range(n_experts):
8889+
ename = f"model.layers.{bid}.feed_forward.experts.{xid}.{w_name}.weight"
8890+
datas.append(expert_cache[ename])
8891+
del expert_cache[ename]
8892+
8893+
data_torch = torch.stack(datas, dim=0)
8894+
merged_name = f"layers.{bid}.feed_forward.experts.{w_name}.weight"
8895+
new_name = self.map_tensor_name(merged_name)
8896+
tensors.append((new_name, data_torch))
8897+
8898+
del self._experts_cache[bid]
8899+
return tensors
8900+
8901+
return [(self.map_tensor_name(name), data_torch)]
8902+
8903+
def prepare_tensors(self):
8904+
super().prepare_tensors()
8905+
assert not self._experts_cache
8906+
8907+
88398908
@ModelBase.register("Lfm2VlForConditionalGeneration")
88408909
class LFM2VLModel(MmprojModel):
88418910
def __init__(self, *args, **kwargs):

ggml/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation"
222222
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
223223
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
224224
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
225+
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
226+
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
227+
225228
option(GGML_ZDNN "ggml: use zDNN" OFF)
226229
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
227230
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,13 @@ if (GGML_WEBGPU_DEBUG)
5050
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
5151
endif()
5252

53+
if (GGML_WEBGPU_CPU_PROFILE)
54+
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_CPU_PROFILE=1)
55+
endif()
56+
57+
if (GGML_WEBGPU_GPU_PROFILE)
58+
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_GPU_PROFILE=1)
59+
endif()
60+
5361
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
5462
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})

0 commit comments

Comments
 (0)