update RobertaEmbedding for Vulkan compatibility.

Judd · Judd · commit 1dd8b6bc1f17 · 2025-03-13T18:33:37.000+08:00
diff --git a/src/layers.cpp b/src/layers.cpp
@@ -630,11 +630,7 @@ namespace chatllm
     {
         int qlen = (int)input->ne[0];
 
-        // quick fix for `before_initial_run`
-        if (n_past + pad_index + qlen > indices->ne[0])
-            n_past = (int)indices->ne[0] - qlen - pad_index;
-
-        ggml::tensor *idx = ggml::view_1d(ctx, indices, qlen, (n_past + pad_index) * ggml::element_size(indices));
+        ggml::tensor *idx = ggml::view_1d(ctx, indices, qlen, 0);
 
         ggml::tensor *output1 = ggml::get_rows(ctx, word_weight, input);
         ggml::tensor *output2 = ggml::get_rows(ctx, position_weight, idx);
diff --git a/src/layers.h b/src/layers.h
@@ -383,13 +383,13 @@ namespace chatllm
             : word_weight(ggml::new_tensor_2d(ctx, ctx->dtype, embedding_dim, num_embeddings)),
               position_weight(ggml::new_tensor_2d(ctx, ctx->dtype, embedding_dim, pos_max)),
               indices(ggml::new_tensor_1d(ctx, GGML_TYPE_I32, pos_max)),
-              ln(ctx, embedding_dim),
-              pad_index(2)
+              ln(ctx, embedding_dim)
         {
+            const int pad_index = 2;
             std::vector<int> v_indices;
             v_indices.resize(pos_max);
             for (int i = 0; i < pos_max; i++)
-                v_indices[i] = i;
+                v_indices[i] = pad_index + i;
 
             ctx->get_allocator()->alloc(indices);
             Backend::write_tensor_data(indices, v_indices.data());
@@ -411,7 +411,6 @@ namespace chatllm
         ggml::tensor *position_weight;
         ggml::tensor *indices;
         LayerNorm    ln;
-        int          pad_index;
     };
 
     class RMSNorm : public Block