Skip to content

Commit eb5a27a

Browse files
author
Gitty Burstein
committed
ci: fix editorconfig issues (LF endings, trailing spaces, final newline)
1 parent ff6f378 commit eb5a27a

File tree

2 files changed

+78
-78
lines changed

2 files changed

+78
-78
lines changed

ggml/src/ggml-sycl/set.cpp

Lines changed: 73 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,73 @@
1-
#include "presets.hpp"
2-
#include "common.hpp"
3-
#include "ggml.h"
4-
#include "set.hpp"
5-
#include <cstdint>
6-
#include <sycl/sycl.hpp>
7-
using namespace sycl;
8-
9-
// Internal function: perform element-wise set operation for each thread
10-
inline void set_f32(const float* src, float* dst,
11-
const int64_t ne0, const int64_t ne1,
12-
const int64_t ne2, const int64_t ne3,
13-
const int64_t nb[3], const int64_t src_nb[3],
14-
const int64_t offset_elem,
15-
const nd_item<1>& item)
16-
{
17-
const size_t idx = item.get_global_id(0);
18-
const size_t total = ne0 * ne1 * ne2 * ne3;
19-
if (idx >= total) return;
20-
21-
// Convert linear index to 4D indices
22-
const size_t i3 = idx / (ne2 * ne1 * ne0);
23-
const size_t rem = idx % (ne2 * ne1 * ne0);
24-
const size_t i2 = rem / (ne1 * ne0);
25-
const size_t rem2 = rem % (ne1 * ne0);
26-
const size_t i1 = rem2 / ne0;
27-
const size_t i0 = rem2 % ne0;
28-
29-
// Compute source and destination indices and copy
30-
dst[i0 + i1*nb[0] + i2*nb[1] + i3*nb[2] + offset_elem] =
31-
src[i0 + i1*src_nb[0] + i2*src_nb[1] + i3*src_nb[2]];
32-
}
33-
34-
// Main function: prepare GPU queue and launch parallel_for
35-
void ggml_sycl_op_set(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
36-
const ggml_tensor* src0 = dst->src[0];
37-
const ggml_tensor* src1 = dst->src[1];
38-
39-
// Ensure shapes and types are compatible
40-
GGML_ASSERT(ggml_are_same_shape(src0, dst));
41-
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
42-
GGML_ASSERT(dst->type == src0->type && src0->type == src1->type && dst->type == GGML_TYPE_F32);
43-
44-
const int32_t* opts = (const int32_t*) dst->op_params;
45-
const int64_t nb[3] = {opts[0]/sizeof(float), opts[1]/sizeof(float), opts[2]/sizeof(float)};
46-
const int64_t offset_elem = opts[3] / sizeof(float);
47-
const bool inplace = opts[4];
48-
49-
float* dst_ptr = (float*) dst->data;
50-
const float* src0_ptr = (const float*) src0->data;
51-
const float* src1_ptr = (const float*) src1->data;
52-
53-
queue_ptr stream = ctx.stream();
54-
55-
// Copy src0 to dst if not inplace
56-
if (!inplace)
57-
stream->memcpy(dst_ptr, src0_ptr, ggml_nbytes(dst));
58-
59-
const int64_t ne[4] = {src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]};
60-
const int64_t src_nb[3] = {src1->nb[1]/sizeof(float), src1->nb[2]/sizeof(float), src1->nb[3]/sizeof(float)};
61-
62-
const size_t total_threads = ne[0]*ne[1]*ne[2]*ne[3];
63-
const size_t grid_size = ((total_threads + SYCL_SET_BLOCK_SIZE - 1) / SYCL_SET_BLOCK_SIZE) * SYCL_SET_BLOCK_SIZE;
64-
65-
// Copy src0 to dst if not inplace
66-
stream->parallel_for(
67-
nd_range<1>(range<1>(grid_size), range<1>(SYCL_SET_BLOCK_SIZE)),
68-
[=](nd_item<1> item) {
69-
set_f32(src1_ptr, dst_ptr,
70-
ne[0], ne[1], ne[2], ne[3],
71-
nb, src_nb, offset_elem, item); }
72-
);
73-
}
1+
#include "presets.hpp"
2+
#include "common.hpp"
3+
#include "ggml.h"
4+
#include "set.hpp"
5+
#include <cstdint>
6+
#include <sycl/sycl.hpp>
7+
using namespace sycl;
8+
9+
// Internal function: perform element-wise set operation for each thread
10+
inline void set_f32(const float* src, float* dst,
11+
const int64_t ne0, const int64_t ne1,
12+
const int64_t ne2, const int64_t ne3,
13+
const int64_t nb[3], const int64_t src_nb[3],
14+
const int64_t offset_elem,
15+
const nd_item<1>& item)
16+
{
17+
const size_t idx = item.get_global_id(0);
18+
const size_t total = ne0 * ne1 * ne2 * ne3;
19+
if (idx >= total) return;
20+
21+
// Convert linear index to 4D indices
22+
const size_t i3 = idx / (ne2 * ne1 * ne0);
23+
const size_t rem = idx % (ne2 * ne1 * ne0);
24+
const size_t i2 = rem / (ne1 * ne0);
25+
const size_t rem2 = rem % (ne1 * ne0);
26+
const size_t i1 = rem2 / ne0;
27+
const size_t i0 = rem2 % ne0;
28+
29+
// Compute source and destination indices and copy
30+
dst[i0 + i1*nb[0] + i2*nb[1] + i3*nb[2] + offset_elem] =
31+
src[i0 + i1*src_nb[0] + i2*src_nb[1] + i3*src_nb[2]];
32+
}
33+
34+
// Main function: prepare GPU queue and launch parallel_for
35+
void ggml_sycl_op_set(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
36+
const ggml_tensor* src0 = dst->src[0];
37+
const ggml_tensor* src1 = dst->src[1];
38+
39+
// Ensure shapes and types are compatible
40+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
41+
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
42+
GGML_ASSERT(dst->type == src0->type && src0->type == src1->type && dst->type == GGML_TYPE_F32);
43+
44+
const int32_t* opts = (const int32_t*) dst->op_params;
45+
const int64_t nb[3] = {opts[0]/sizeof(float), opts[1]/sizeof(float), opts[2]/sizeof(float)};
46+
const int64_t offset_elem = opts[3] / sizeof(float);
47+
const bool inplace = opts[4];
48+
49+
float* dst_ptr = (float*) dst->data;
50+
const float* src0_ptr = (const float*) src0->data;
51+
const float* src1_ptr = (const float*) src1->data;
52+
53+
queue_ptr stream = ctx.stream();
54+
55+
// Copy src0 to dst if not inplace
56+
if (!inplace)
57+
stream->memcpy(dst_ptr, src0_ptr, ggml_nbytes(dst));
58+
59+
const int64_t ne[4] = {src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]};
60+
const int64_t src_nb[3] = {src1->nb[1]/sizeof(float), src1->nb[2]/sizeof(float), src1->nb[3]/sizeof(float)};
61+
62+
const size_t total_threads = ne[0]*ne[1]*ne[2]*ne[3];
63+
const size_t grid_size = ((total_threads + SYCL_SET_BLOCK_SIZE - 1) / SYCL_SET_BLOCK_SIZE) * SYCL_SET_BLOCK_SIZE;
64+
65+
// Copy src0 to dst if not inplace
66+
stream->parallel_for(
67+
nd_range<1>(range<1>(grid_size), range<1>(SYCL_SET_BLOCK_SIZE)),
68+
[=](nd_item<1> item) {
69+
set_f32(src1_ptr, dst_ptr,
70+
ne[0], ne[1], ne[2], ne[3],
71+
nb, src_nb, offset_elem, item); }
72+
);
73+
}

ggml/src/ggml-sycl/set.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#pragma once
2-
#include "backend.hpp"
3-
#include "ggml.h"
4-
5-
void ggml_sycl_op_set(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
1+
#pragma once
2+
#include "backend.hpp"
3+
#include "ggml.h"
4+
5+
void ggml_sycl_op_set(ggml_backend_sycl_context & ctx, ggml_tensor * dst);

0 commit comments

Comments
 (0)