Skip to content

Commit 1871c20

Browse files
Merge pull request #108 from ggml-org/master
Sync from upstream
2 parents a57cceb + 0f5ccd6 commit 1871c20

28 files changed

+4879
-212
lines changed

.devops/cann.Dockerfile

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# ==============================================================================
2+
# ARGUMENTS
3+
# ==============================================================================
4+
5+
# Define the CANN base image for easier version updates later
6+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
7+
8+
# ==============================================================================
9+
# BUILD STAGE
10+
# Compile all binary files and libraries
11+
# ==============================================================================
12+
FROM ${CANN_BASE_IMAGE} AS build
13+
14+
# Define the Ascend chip model for compilation. Default is Ascend910B3
15+
ARG ASCEND_SOC_TYPE=Ascend910B3
16+
17+
# -- Install build dependencies --
18+
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
19+
yum clean all && \
20+
rm -rf /var/cache/yum
21+
22+
# -- Set the working directory --
23+
WORKDIR /app
24+
25+
# -- Copy project files --
26+
COPY . .
27+
28+
# -- Set CANN environment variables (required for compilation) --
29+
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
30+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
31+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
32+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
33+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
34+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
35+
# ... You can add other environment variables from the original file as needed ...
36+
# For brevity, only core variables are listed here. You can paste the original ENV list here.
37+
38+
# -- Build llama.cpp --
39+
# Use the passed ASCEND_SOC_TYPE argument and add general build options
40+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
41+
&& \
42+
cmake -B build \
43+
-DGGML_CANN=ON \
44+
-DCMAKE_BUILD_TYPE=Release \
45+
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
46+
. && \
47+
cmake --build build --config Release -j$(nproc)
48+
49+
# -- Organize build artifacts for copying in later stages --
50+
# Create a lib directory to store all .so files
51+
RUN mkdir -p /app/lib && \
52+
find build -name "*.so" -exec cp {} /app/lib \;
53+
54+
# Create a full directory to store all executables and Python scripts
55+
RUN mkdir -p /app/full && \
56+
cp build/bin/* /app/full/ && \
57+
cp *.py /app/full/ && \
58+
cp -r gguf-py /app/full/ && \
59+
cp -r requirements /app/full/ && \
60+
cp requirements.txt /app/full/
61+
# If you have a tools.sh script, make sure it is copied here
62+
# cp .devops/tools.sh /app/full/tools.sh
63+
64+
# ==============================================================================
65+
# BASE STAGE
66+
# Create a minimal base image with CANN runtime and common libraries
67+
# ==============================================================================
68+
FROM ${CANN_BASE_IMAGE} AS base
69+
70+
# -- Install runtime dependencies --
71+
RUN yum install -y libgomp curl && \
72+
yum clean all && \
73+
rm -rf /var/cache/yum
74+
75+
# -- Set CANN environment variables (required for runtime) --
76+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
77+
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
78+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
79+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
80+
# ... You can add other environment variables from the original file as needed ...
81+
82+
WORKDIR /app
83+
84+
# Copy compiled .so files from the build stage
85+
COPY --from=build /app/lib/ /app
86+
87+
# ==============================================================================
88+
# FINAL STAGES (TARGETS)
89+
# ==============================================================================
90+
91+
### Target: full
92+
# Complete image with all tools, Python bindings, and dependencies
93+
# ==============================================================================
94+
FROM base AS full
95+
96+
COPY --from=build /app/full /app
97+
98+
# Install Python dependencies
99+
RUN yum install -y git python3 python3-pip && \
100+
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
101+
pip3 install --no-cache-dir -r requirements.txt && \
102+
yum clean all && \
103+
rm -rf /var/cache/yum
104+
105+
# You need to provide a tools.sh script as the entrypoint
106+
ENTRYPOINT ["/app/tools.sh"]
107+
# If there is no tools.sh, you can set the default to start the server
108+
# ENTRYPOINT ["/app/llama-server"]
109+
110+
### Target: light
111+
# Lightweight image containing only llama-cli
112+
# ==============================================================================
113+
FROM base AS light
114+
115+
COPY --from=build /app/full/llama-cli /app
116+
117+
ENTRYPOINT [ "/app/llama-cli" ]
118+
119+
### Target: server
120+
# Dedicated server image containing only llama-server
121+
# ==============================================================================
122+
FROM base AS server
123+
124+
ENV LLAMA_ARG_HOST=0.0.0.0
125+
126+
COPY --from=build /app/full/llama-server /app
127+
128+
HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
129+
130+
ENTRYPOINT [ "/app/llama-server" ]

common/arg.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,6 +2380,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23802380
}
23812381
}
23822382
));
2383+
add_opt(common_arg(
2384+
{"--cpu-moe"},
2385+
"use CPU for Mixture of Experts (MoE) weights",
2386+
[](common_params & params) {
2387+
params.tensor_buft_overrides.push_back({"\\.ffn_up_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2388+
params.tensor_buft_overrides.push_back({"\\.ffn_down_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2389+
params.tensor_buft_overrides.push_back({"\\.ffn_gate_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2390+
}
2391+
).set_env("LLAMA_ARG_CPU_MOE"));
23832392
add_opt(common_arg(
23842393
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
23852394
"number of layers to store in VRAM",

convert_hf_to_gguf.py

Lines changed: 95 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684684
if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664":
685685
# ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct
686686
res = "hunyuan"
687+
if chkhsh == "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6":
688+
# ref: https://huggingface.co/tencent/Hunyuan-4B-Instruct
689+
res = "hunyuan-dense"
687690
if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6":
688691
# ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base
689692
res = "falcon-h1"
@@ -7553,11 +7556,6 @@ def set_gguf_parameters(self):
75537556
class HunYuanMoEModel(TextModel):
75547557
model_arch = gguf.MODEL_ARCH.HUNYUAN_MOE
75557558

7556-
def __init__(self, *args, **kwargs):
7557-
super().__init__(*args, **kwargs)
7558-
# For handling tied embeddings
7559-
self._tok_embd = None
7560-
75617559
def set_vocab(self):
75627560
from transformers import AutoTokenizer
75637561
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
@@ -7651,9 +7649,6 @@ def set_gguf_parameters(self):
76517649
_experts: list[dict[str, Tensor]] | None = None
76527650

76537651
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
7654-
if name == "model.embed_tokens.weight":
7655-
self._tok_embd = data_torch.clone()
7656-
76577652
if name == "lm_head.weight":
76587653
if self.hparams.get("tie_word_embeddings", False):
76597654
logger.info("Skipping tied output layer 'lm_head.weight'")
@@ -7698,6 +7693,98 @@ def prepare_tensors(self):
76987693
raise ValueError(f"Unprocessed experts: {experts}")
76997694

77007695

7696+
@ModelBase.register("HunYuanDenseV1ForCausalLM")
7697+
class HunYuanModel(TextModel):
7698+
model_arch = gguf.MODEL_ARCH.HUNYUAN_DENSE
7699+
7700+
def set_vocab(self):
7701+
if (self.dir_model / "tokenizer.json").is_file():
7702+
self._set_vocab_gpt2()
7703+
else:
7704+
from transformers import AutoTokenizer
7705+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
7706+
7707+
# 1. Get the pre-tokenizer identifier hash
7708+
tokpre = self.get_vocab_base_pre(tokenizer)
7709+
7710+
# 2. Reverse-engineer the merges list from mergeable_ranks
7711+
merges = []
7712+
vocab = {}
7713+
mergeable_ranks = tokenizer.mergeable_ranks
7714+
for token, rank in mergeable_ranks.items():
7715+
vocab[QwenModel.token_bytes_to_string(token)] = rank
7716+
if len(token) == 1:
7717+
continue
7718+
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
7719+
if len(merged) == 2:
7720+
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
7721+
7722+
# 3. Generate the tokens and toktypes lists
7723+
vocab_size = self.hparams["vocab_size"]
7724+
assert tokenizer.vocab_size == vocab_size
7725+
special_tokens = tokenizer.special_tokens
7726+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
7727+
tokens: list[str] = []
7728+
toktypes: list[int] = []
7729+
for i in range(vocab_size):
7730+
if i not in reverse_vocab:
7731+
tokens.append(f"[PAD{i}]")
7732+
toktypes.append(gguf.TokenType.UNUSED)
7733+
else:
7734+
token = reverse_vocab[i]
7735+
tokens.append(token)
7736+
if i in special_tokens.values():
7737+
toktypes.append(gguf.TokenType.CONTROL)
7738+
else:
7739+
toktypes.append(gguf.TokenType.NORMAL)
7740+
7741+
# 4. Write all vocab-related fields to the GGUF writer
7742+
self.gguf_writer.add_tokenizer_model("gpt2")
7743+
self.gguf_writer.add_tokenizer_pre(tokpre)
7744+
self.gguf_writer.add_token_list(tokens)
7745+
self.gguf_writer.add_token_types(toktypes)
7746+
self.gguf_writer.add_token_merges(merges)
7747+
7748+
# 5. Add special tokens and chat templates
7749+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
7750+
special_vocab.add_to_gguf(self.gguf_writer)
7751+
# FIX for BOS token: Overwrite incorrect id read from config.json
7752+
if self.hparams['hidden_size'] == 4096:
7753+
self.gguf_writer.add_bos_token_id(127958) # only for 7b dense, fix <|bos|> token
7754+
7755+
def set_gguf_parameters(self):
7756+
super().set_gguf_parameters()
7757+
hparams = self.hparams
7758+
7759+
# Rope
7760+
rope_scaling = hparams.get("rope_scaling", {})
7761+
if rope_scaling.get("type") == "dynamic":
7762+
# HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/
7763+
# 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf)
7764+
alpha = rope_scaling.get("alpha", 50)
7765+
base = hparams.get("rope_theta", 10000.0)
7766+
dim = hparams["head_dim"]
7767+
scaled_base = base * (alpha ** (dim / (dim - 2)))
7768+
self.gguf_writer.add_rope_freq_base(scaled_base)
7769+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
7770+
self.gguf_writer.add_rope_scaling_factor(1)
7771+
# There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k
7772+
self.gguf_writer.add_rope_scaling_orig_ctx_len(256 * 1024) # 256k context length
7773+
self.gguf_writer.add_context_length(256 * 1024) # 256k context length
7774+
7775+
# if any of our assumptions about the values are wrong, something has changed and this may need to be updated
7776+
assert base == 10000.0 and self.hparams["max_position_embeddings"] in [32 * 1024, 256 * 1024] , \
7777+
"HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually"
7778+
7779+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
7780+
if name == "lm_head.weight":
7781+
if self.hparams.get("tie_word_embeddings", False):
7782+
logger.info("Skipping tied output layer 'lm_head.weight'")
7783+
return []
7784+
7785+
return [(self.map_tensor_name(name), data_torch)]
7786+
7787+
77017788
@ModelBase.register("SmolLM3ForCausalLM")
77027789
class SmolLM3Model(LlamaModel):
77037790
model_arch = gguf.MODEL_ARCH.SMOLLM3

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ class TOKENIZER_TYPE(IntEnum):
140140
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
141141
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
142142
{"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"},
143+
{"name": "hunyuan-dense", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-4B-Instruct", "chkhsh": "bba3b3366b646dbdded5dbc42d59598b849371afc42f7beafa914afaa5b70aa6"},
143144
# falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes
144145
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"},
145146
{"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"},

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,6 +2016,9 @@ static bool ggml_backend_cann_cpy_tensor_async(
20162016
(ggml_backend_cann_context*)backend_dst->context;
20172017

20182018
size_t copy_size = ggml_nbytes(dst);
2019+
if (copy_size == 0) {
2020+
return true;
2021+
}
20192022
if (backend_src != backend_dst) {
20202023
ggml_backend_cann_buffer_context* buf_ctx_src =
20212024
(ggml_backend_cann_buffer_context*)buf_src->context;

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,21 @@
3737
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
3838
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
3939
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
40+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
4041
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
4142
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
4243
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
4344
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
4445
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
46+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
4547
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
4648
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
4749
// repack.cpp
4850
#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8
4951
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
52+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
5053
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
54+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
5155
#elif defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
5256
// repack.cpp
5357
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
@@ -72,11 +76,13 @@
7276
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
7377
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
7478
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
79+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
7580
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
7681
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
7782
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
7883
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
7984
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
85+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
8086
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
8187
#elif defined(__loongarch64)
8288
// quants.c
@@ -92,11 +98,13 @@
9298
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
9399
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
94100
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
101+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
95102
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
96103
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
97104
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
98105
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
99106
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
107+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
100108
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
101109
#elif defined(__riscv)
102110
// quants.c
@@ -119,10 +127,12 @@
119127
#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0
120128
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
121129
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
130+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
122131
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
123132
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
124133
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
125134
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
135+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
126136
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
127137
#elif defined(__s390x__)
128138
// quants.c
@@ -147,11 +157,13 @@
147157
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
148158
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
149159
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
160+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
150161
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
151162
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
152163
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
153164
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
154165
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
166+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
155167
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
156168
#elif defined(__wasm__)
157169
// quants.c
@@ -175,10 +187,12 @@
175187
#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0
176188
#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0
177189
#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K
190+
#define ggml_gemv_q2_K_8x8_q8_K_generic ggml_gemv_q2_K_8x8_q8_K
178191
#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0
179192
#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0
180193
#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0
181194
#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0
182195
#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K
196+
#define ggml_gemm_q2_K_8x8_q8_K_generic ggml_gemm_q2_K_8x8_q8_K
183197
#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0
184198
#endif

0 commit comments

Comments
 (0)