Skip to content

Commit c722720

Browse files
committed
feat: Optimize rope operations with vectorization
Uses `sycl::vec` to load and store two elements at a time, significantly improving performance in `rope_norm`, `rope_neox`, and `rope_multi`. This reduces the number of memory accesses and leverages SIMD instructions for faster execution.
1 parent 229ea31 commit c722720

File tree

1 file changed

+3
-12
lines changed

1 file changed

+3
-12
lines changed

ggml/src/ggml-sycl/rope.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@ static void rope_norm(const T * x, T * dst, const int ne0, const int ne1, const
4949

5050
if (i0 >= n_dims) {
5151
const int i = row * ne0 + i0;
52-
53-
dst[i + 0] = x[i + 0];
54-
dst[i + 1] = x[i + 1];
55-
52+
*reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i);
5653
return;
5754
}
5855

@@ -93,10 +90,7 @@ static void rope_neox(const T * x, T * dst, const int ne0, const int ne1, const
9390

9491
if (i0 >= n_dims) {
9592
const int i = row * ne0 + i0;
96-
97-
dst[i + 0] = x[i + 0];
98-
dst[i + 1] = x[i + 1];
99-
93+
*reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i);
10094
return;
10195
}
10296

@@ -137,10 +131,7 @@ static void rope_multi(const T * x, T * dst, const int ne0, const int ne1, const
137131

138132
if (i0 >= n_dims) {
139133
const int i = row_dst*ne0 + i0;
140-
141-
dst[i + 0] = x[i + 0];
142-
dst[i + 1] = x[i + 1];
143-
134+
*reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i);
144135
return;
145136
}
146137

0 commit comments

Comments
 (0)