Skip to content

Commit 9a50547

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents cfd3150 + bd0af02 commit 9a50547

File tree

18 files changed

+563
-299
lines changed

18 files changed

+563
-299
lines changed

.devops/musa.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
33
ARG MUSA_VERSION=rc4.3.0
44
# Target the MUSA build image
5-
ARG BASE_MUSA_DEV_CONTAINER=sh-harbor.mthreads.com/haive/mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66

7-
ARG BASE_MUSA_RUN_CONTAINER=sh-harbor.mthreads.com/haive/mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
7+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
88

99
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
1010

.github/workflows/docker.yml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
push_to_registry:
2929
name: Push Docker image to Docker Hub
3030

31-
runs-on: ubuntu-22.04
31+
runs-on: ${{ matrix.config.runs_on }}
3232
env:
3333
COMMIT_SHA: ${{ github.sha }}
3434
strategy:
@@ -39,12 +39,12 @@ jobs:
3939
# Note: the arm64 images are failing, which prevents the amd64 images from being built
4040
# https://github.com/ggml-org/llama.cpp/issues/11888
4141
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
42-
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
43-
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
44-
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
45-
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
46-
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
47-
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false }
42+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
43+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
44+
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
45+
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
46+
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
47+
- { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" }
4848
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
4949
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }
5050
steps:
@@ -54,6 +54,7 @@ jobs:
5454
fetch-depth: 0 # preserve git history, so we can determine the build number
5555

5656
- name: Set up QEMU
57+
if: ${{ matrix.config.tag != 's390x' }}
5758
uses: docker/setup-qemu-action@v3
5859
with:
5960
image: tonistiigi/binfmt:qemu-v7.0.0-28

common/chat.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,17 +1616,36 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
16161616
);
16171617
});
16181618

1619-
auto recipient_in_role = builder.add_rule("recipient_in_role",
1620-
"\"<|start|>assistant\"? \" to=functions.\" ( " +
1621-
string_join(tool_rules_recipient_in_role, " | ") + " )"
1622-
);
1623-
16241619
auto recipient_in_channel = builder.add_rule("recipient_in_channel",
16251620
channel + " \" to=functions.\" ( " +
16261621
string_join(tool_rules_recipient_in_channel, " | ") + " )"
16271622
);
16281623

1629-
builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
1624+
if (data.grammar_lazy) {
1625+
auto recipient_in_role = builder.add_rule("recipient_in_role",
1626+
"\"<|start|>assistant\"? \" to=functions.\" ( " +
1627+
string_join(tool_rules_recipient_in_role, " | ") + " )"
1628+
);
1629+
1630+
builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
1631+
} else {
1632+
auto not_end = builder.add_rule("not-end",
1633+
"[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
1634+
auto analysis = builder.add_rule("analysis",
1635+
"\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1636+
auto commentary = builder.add_rule("commentary",
1637+
"\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1638+
1639+
auto recipient_in_role = builder.add_rule("recipient_in_role",
1640+
"\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
1641+
);
1642+
1643+
builder.add_rule("root",
1644+
"( " + analysis + " \"<|start|>assistant\" )? " +
1645+
"( " + commentary + " \"<|start|>assistant\" )? " +
1646+
"( " + recipient_in_role + " | " + recipient_in_channel + " )"
1647+
);
1648+
}
16301649

16311650
// Trigger on tool calls that appear in the commentary channel
16321651
data.grammar_triggers.push_back({

ggml/src/ggml-metal/ggml-metal-context.m

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,13 +567,13 @@ void ggml_metal_set_n_cb(ggml_metal_t ctx, int n_cb) {
567567
ctx->debug_graph,
568568
ctx->debug_fusion);
569569

570-
for (int idx = idx_start; idx < idx_end;) {
570+
for (int idx = 0; idx < ggml_metal_op_n_nodes(ctx_op); ++idx) {
571571
const int res = ggml_metal_op_encode(ctx_op, idx);
572572
if (res == 0) {
573573
break;
574574
}
575575

576-
idx += res;
576+
idx += res - 1;
577577
}
578578

579579
ggml_metal_op_free(ctx_op);

ggml/src/ggml-metal/ggml-metal-device.cpp

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -438,21 +438,35 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv_ext(ggml_metal_libr
438438
return res;
439439
}
440440

441-
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm(ggml_metal_library_t lib, ggml_type tsrc0, ggml_type tsrc1) {
441+
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm(ggml_metal_library_t lib, const ggml_tensor * op) {
442442
char base[256];
443443
char name[256];
444444

445+
const ggml_type tsrc0 = op->src[0]->type;
446+
const ggml_type tsrc1 = op->src[1]->type;
447+
448+
const bool bc_inp = op->src[0]->ne[0] % 32 != 0;
449+
const bool bc_out = op->ne[0] % 64 != 0 || op->ne[1] % 32 != 0;
450+
445451
snprintf(base, 256, "kernel_mul_mm_%s_%s", ggml_type_name(tsrc0), ggml_type_name(tsrc1));
446-
snprintf(name, 256, "%s", base);
452+
snprintf(name, 256, "%s_bci=%d_bco=%d", base, bc_inp, bc_out);
447453

448454
ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
449455
if (res) {
450456
return res;
451457
}
452458

453-
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
459+
ggml_metal_cv_t cv = ggml_metal_cv_init();
454460

455-
ggml_metal_pipeline_set_smem(res, 8192);
461+
ggml_metal_cv_set_bool(cv, bc_inp, FC_MUL_MM + 0);
462+
ggml_metal_cv_set_bool(cv, bc_out, FC_MUL_MM + 1);
463+
464+
res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
465+
466+
ggml_metal_cv_free(cv);
467+
468+
// when the output size is not multiple of 64x32, we need extra smem to prevent out-of-bounds writes
469+
ggml_metal_pipeline_set_smem(res, bc_out ? 8192 : 4096 + 2048);
456470

457471
return res;
458472
}
@@ -659,19 +673,30 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id_map0(ggml_metal_
659673
return res;
660674
}
661675

662-
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id(ggml_metal_library_t lib, ggml_type tsrc0, ggml_type tsrc1) {
676+
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id(ggml_metal_library_t lib, const ggml_tensor * op) {
663677
char base[256];
664678
char name[256];
665679

680+
const ggml_type tsrc0 = op->src[0]->type;
681+
const ggml_type tsrc1 = op->src[1]->type;
682+
683+
const bool bc_inp = op->src[0]->ne[0] % 32 != 0;
684+
666685
snprintf(base, 256, "kernel_mul_mm_id_%s_%s", ggml_type_name(tsrc0), ggml_type_name(tsrc1));
667-
snprintf(name, 256, "%s", base);
686+
snprintf(name, 256, "%s_bci=%d", base, bc_inp);
668687

669688
ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
670689
if (res) {
671690
return res;
672691
}
673692

674-
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
693+
ggml_metal_cv_t cv = ggml_metal_cv_init();
694+
695+
ggml_metal_cv_set_bool(cv, bc_inp, FC_MUL_MM + 0);
696+
697+
res = ggml_metal_library_compile_pipeline(lib, base, name, cv);
698+
699+
ggml_metal_cv_free(cv);
675700

676701
ggml_metal_pipeline_set_smem(res, 8192);
677702

ggml/src/ggml-metal/ggml-metal-device.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_ssm_conv (ggml_me
115115
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_ssm_scan (ggml_metal_library_t lib, const struct ggml_tensor * op);
116116
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_rwkv (ggml_metal_library_t lib, const struct ggml_tensor * op);
117117
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv_ext (ggml_metal_library_t lib, enum ggml_type tsrc0, enum ggml_type tsrc1, int nsg, int nxpsg, int r1ptg);
118-
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm (ggml_metal_library_t lib, enum ggml_type tsrc0, enum ggml_type tsrc1);
118+
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm (ggml_metal_library_t lib, const struct ggml_tensor * op);
119119
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv (ggml_metal_library_t lib, const struct ggml_tensor * op);
120120
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id_map0 (ggml_metal_library_t lib, int ne02, int ne20);
121-
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id (ggml_metal_library_t lib, enum ggml_type tsrc0, enum ggml_type tsrc1);
121+
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mm_id (ggml_metal_library_t lib, const struct ggml_tensor * op);
122122
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_mul_mv_id (ggml_metal_library_t lib, const struct ggml_tensor * op);
123123
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_argmax (ggml_metal_library_t lib, const struct ggml_tensor * op);
124124
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_argsort (ggml_metal_library_t lib, const struct ggml_tensor * op);

ggml/src/ggml-metal/ggml-metal-device.m

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,8 +717,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
717717
return true;
718718
case GGML_OP_MUL_MAT:
719719
case GGML_OP_MUL_MAT_ID:
720-
return has_simdgroup_reduction &&
721-
(op->src[0]->type != GGML_TYPE_F32 || op->src[1]->type == GGML_TYPE_F32);
720+
return has_simdgroup_reduction;
722721
case GGML_OP_CPY:
723722
case GGML_OP_DUP:
724723
case GGML_OP_CONT:

ggml/src/ggml-metal/ggml-metal-impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#define FC_FLASH_ATTN_EXT_VEC 200
7777
#define FC_FLASH_ATTN_EXT_VEC_REDUCE 300
7878
#define FC_MUL_MV 400
79+
#define FC_MUL_MM 500
7980

8081
// kernel argument structs
8182
//

0 commit comments

Comments
 (0)