|
2 | 2 | #include "ggml.h" |
3 | 3 | #include "ggml-cuda/rel-pos.cuh" |
4 | 4 |
|
5 | | -/* |
6 | | -
|
7 | | -static void ggml_compute_forward_get_rel_pos_f16( |
8 | | - const ggml_compute_params * params, |
9 | | - ggml_tensor * dst) { |
10 | | - GGML_UNUSED(params); |
11 | | -
|
12 | | - const ggml_tensor * src0 = dst->src[0]; |
13 | | -
|
14 | | - // ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322 |
15 | | -
|
16 | | - GGML_TENSOR_UNARY_OP_LOCALS |
17 | | -
|
18 | | - const int64_t kh = ne1; |
19 | | -
|
20 | | - ggml_fp16_t * src0_data = (ggml_fp16_t *) src0->data; |
21 | | - ggml_fp16_t * dst_data = (ggml_fp16_t *) dst->data; |
22 | | -
|
23 | | - for (int64_t i2 = 0; i2 < ne2; ++i2) { |
24 | | - for (int64_t i1 = 0; i1 < ne1; ++i1) { |
25 | | - const int64_t pos = (kh - i1 - 1) + i2; |
26 | | - for (int64_t i0 = 0; i0 < ne0; ++i0) { |
27 | | - dst_data[i2*ne1*ne0 + i1*ne0 + i0] = src0_data[pos*ne00 + i0]; |
28 | | - } |
29 | | - } |
30 | | - } |
31 | | -} |
32 | | -
|
33 | | -
|
34 | | -void ggml_compute_forward_get_rel_pos( |
35 | | - const ggml_compute_params * params, |
36 | | - ggml_tensor * dst) { |
37 | | -
|
38 | | - const ggml_tensor * src0 = dst->src[0]; |
39 | | -
|
40 | | - switch (src0->type) { |
41 | | - case GGML_TYPE_F32: |
42 | | - { |
43 | | - ggml_compute_forward_get_rel_pos_f32(params, dst); |
44 | | - } break; |
45 | | - case GGML_TYPE_F16: |
46 | | - case GGML_TYPE_BF16: |
47 | | - { |
48 | | - ggml_compute_forward_get_rel_pos_f16(params, dst); |
49 | | - } break; |
50 | | - default: |
51 | | - { |
52 | | - GGML_ABORT("fatal error"); |
53 | | - } |
54 | | - } |
55 | | -} |
56 | | -
|
57 | | -struct ggml_tensor * ggml_get_rel_pos( |
58 | | - struct ggml_context * ctx, |
59 | | - struct ggml_tensor * a, |
60 | | - int qh, |
61 | | - int kh) { |
62 | | - GGML_ASSERT(qh + kh - 1 <= a->ne[1]); |
63 | | -
|
64 | | - const int64_t ne[4] = { a->ne[0], kh, qh, 1, }; |
65 | | - struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 3, ne); |
66 | | -
|
67 | | - result->op = GGML_OP_GET_REL_POS; |
68 | | - result->src[0] = a; |
69 | | -
|
70 | | - return result; |
71 | | -} |
72 | | -
|
73 | | -*/ |
74 | 5 |
|
75 | 6 | template <typename T> |
76 | 7 | __global__ static void get_rel_pos_kernel(const void * src, void * dst, int C) { |
77 | 8 | int kh = gridDim.x; |
| 9 | + int qh = gridDim.x; |
| 10 | + float k_scale = MAX(qh / kh, 1.0f); |
| 11 | + float q_scale = MAX(kh / qh, 1.0f); |
78 | 12 | int ki = blockIdx.x; |
79 | 13 | int qi = blockIdx.y; |
80 | | - int pos = (kh - 1) + qi - ki; |
| 14 | + int pos = int(qi*q_scale - ki*k_scale + (kh - 1)*k_scale); |
81 | 15 |
|
82 | 16 | int s0 = C; |
83 | 17 | int s1 = C * kh; |
|
0 commit comments