Skip to content

Commit 49193e2

Browse files
committed
add conv layer
1 parent d44c721 commit 49193e2

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5325,7 +5325,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
53255325
name = name.replace(".final_layer_norm", ".post_attention_layernorm")
53265326

53275327
if "conv1.bias" in name or "conv2.bias" in name:
5328-
data_torch = data_torch.unsqueeze(-1).transpose(0, 1)
5328+
# transpose conv1 and conv2 bias
5329+
data_torch = data_torch.unsqueeze(-1)
53295330

53305331
return [(self.map_tensor_name(name), data_torch)]
53315332

examples/asr/asr-ultravox.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ int main(int argc, char ** argv) {
144144
{
145145
int n_ctx = llama_model_n_ctx_train(enc_model);
146146
int n_embd = llama_model_n_embd(enc_model);
147-
std::vector<float> embd(n_ctx * n_embd, 0.0f);
147+
std::vector<float> embd(2*n_ctx * mel.n_mel, 0.0f);
148148
// set the input
149149
{
150150
int mel_offset = 0;
@@ -160,6 +160,7 @@ int main(int argc, char ** argv) {
160160
}
161161

162162
// set the input
163+
GGML_ASSERT((int)embd.size() < 2*n_ctx * n_embd);
163164
llama_batch batch_embd = llama_batch_init(n_ctx, n_embd, 1);
164165
batch_embd.n_tokens = n_ctx;
165166
for (int i = 0; i < batch_embd.n_tokens; i++) {

src/llama-model.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3836,9 +3836,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
38363836

38373837
// conv1d
38383838
conv1d_1_w = create_tensor(tn(LLM_TENSOR_WHISPER_CONV1, "weight"), {3, 128, n_embd}, 0);
3839-
conv1d_1_b = create_tensor(tn(LLM_TENSOR_WHISPER_CONV1, "bias" ), {n_embd}, 0);
3839+
conv1d_1_b = create_tensor(tn(LLM_TENSOR_WHISPER_CONV1, "bias" ), {1, n_embd}, 0);
38403840
conv1d_2_w = create_tensor(tn(LLM_TENSOR_WHISPER_CONV2, "weight"), {3, n_embd, n_embd}, 0);
3841-
conv1d_2_b = create_tensor(tn(LLM_TENSOR_WHISPER_CONV2, "bias" ), {n_embd}, 0);
3841+
conv1d_2_b = create_tensor(tn(LLM_TENSOR_WHISPER_CONV2, "bias" ), {1, n_embd}, 0);
38423842

38433843
// mm projector
38443844
// https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b/blob/main/ultravox_model.py#L553
@@ -12132,6 +12132,25 @@ struct llm_build_ultravox_enc : public llm_graph_context {
1213212132
}
1213312133
res->add_input(std::move(inp));
1213412134

12135+
if (ubatch.embd) {
12136+
// reshape to [2*n_ctx, n_mel]
12137+
cur = ggml_view_2d(ctx0, inpL, 2*hparams.n_ctx_train, hparams.n_mel_bins,
12138+
ggml_row_size(inpL->type, 2*hparams.n_ctx_train), 0);
12139+
12140+
// convolution + gelu
12141+
cur = ggml_conv_1d_ph(ctx0, model.conv1d_1_w, cur, 1, 1);
12142+
cur = ggml_add(ctx0, cur, model.conv1d_1_b);
12143+
12144+
cur = ggml_gelu(ctx0, cur);
12145+
12146+
cur = ggml_conv_1d_ph(ctx0, model.conv1d_2_w, cur, 2, 1);
12147+
cur = ggml_add(ctx0, cur, model.conv1d_2_b);
12148+
12149+
cur = ggml_gelu(ctx0, cur);
12150+
// transpose
12151+
inpL = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
12152+
}
12153+
1213512154
// add position embeddings
1213612155
ggml_tensor * pos_embd_selected = ggml_view_2d(ctx0, model.pos_embd,
1213712156
model.pos_embd->ne[0], ubatch.n_tokens,

0 commit comments

Comments
 (0)