Skip to content

Commit 34a5017

Browse files
committed
fix cuda
1 parent c951357 commit 34a5017

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+245
-245
lines changed

ggml/src/ggml-cuda/acc.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
3838
const ggml_tensor * src0 = dst->src[0];
3939
const ggml_tensor * src1 = dst->src[1];
4040

41-
const float * src0_d = (const float *) src0->data;
42-
const float * src1_d = (const float *) src1->data;
43-
float * dst_d = (float *) dst->data;
41+
const float * src0_d = (const float *) tensor_data(src0);
42+
const float * src1_d = (const float *) tensor_data(src1);
43+
float * dst_d = (float *) tensor_data(dst);
4444

4545
cudaStream_t stream = ctx.stream();
4646

ggml/src/ggml-cuda/add-id.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ void ggml_cuda_op_add_id(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
4141
GGML_ASSERT(nb10 == sizeof(float));
4242
GGML_ASSERT(nb20 == sizeof(int32_t));
4343

44-
const float * src0_d = (const float *)src0->data;
45-
const float * src1_d = (const float *)src1->data;
46-
const int32_t * src2_d = (const int32_t *)src2->data;
47-
float * dst_d = (float *)dst->data;
44+
const float * src0_d = (const float *)tensor_data(src0);
45+
const float * src1_d = (const float *)tensor_data(src1);
46+
const int32_t * src2_d = (const int32_t *)tensor_data(src2);
47+
float * dst_d = (float *)tensor_data(dst);
4848

4949
int threads = std::min((int)ne00, 768); // cols
5050
dim3 blocks(ne01, ne02); // n_experts_used, n_tokens

ggml/src/ggml-cuda/arange.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ static void arange_f32_cuda(float * dst, const int ne0, const float start, const
1515
}
1616

1717
void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
18-
float * dst_d = (float *)dst->data;
18+
float * dst_d = (float *)tensor_data(dst);
1919
cudaStream_t stream = ctx.stream();
2020

2121
GGML_ASSERT(dst->type == GGML_TYPE_F32);

ggml/src/ggml-cuda/argmax.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
7777
const int64_t ne00 = src0->ne[0];
7878
const int64_t nrows = ggml_nrows(src0);
7979

80-
const float * src0_d = (const float *) src0->data;
81-
int32_t * dst_d = (int32_t *) dst->data;
80+
const float * src0_d = (const float *) tensor_data(src0);
81+
int32_t * dst_d = (int32_t *) tensor_data(dst);
8282

8383
cudaStream_t stream = ctx.stream();
8484

ggml/src/ggml-cuda/argsort.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
8787

8888
void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
8989
const ggml_tensor * src0 = dst->src[0];
90-
const float * src0_d = (const float *)src0->data;
91-
float * dst_d = (float *)dst->data;
90+
const float * src0_d = (const float *)tensor_data(src0);
91+
float * dst_d = (float *)tensor_data(dst);
9292
cudaStream_t stream = ctx.stream();
9393

9494
GGML_ASSERT(src0->type == GGML_TYPE_F32);

ggml/src/ggml-cuda/binbcast.cu

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor *
286286
ne12, ne13,
287287
/* s0, */ s1, s2, s3,
288288
/* s00,*/ s01, s02, s03,
289-
/* s10,*/ s11, s12, s13, (const src1_t *) dst->src[I + 1]->data...);
289+
/* s10,*/ s11, s12, s13, (const src1_t *) tensor_data(dst->src[I + 1])...);
290290
} else {
291291
k_bin_bcast_unravel<bin_op, src0_t, src1_t, dst_t>
292292
<<<block_num, block_size, 0, stream>>>(src0_dd, src1_dd, dst_dd, ne0_fastdiv, ne1_fastdiv,
@@ -302,7 +302,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor *
302302
src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3_fastdiv, ne10, ne11, ne12, ne13,
303303
/* s0, */ s1, s2, s3,
304304
/* s00,*/ s01, s02, s03,
305-
/* s10,*/ s11, s12, s13, (const src1_t *) dst->src[I + 1]->data...);
305+
/* s10,*/ s11, s12, s13, (const src1_t *) tensor_data(dst->src[I + 1])...);
306306
} else {
307307
k_bin_bcast<bin_op, src0_t, src1_t, dst_t><<<block_nums, block_dims, 0, stream>>>(
308308
src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3_fastdiv, ne10, ne11, ne12, ne13,
@@ -389,23 +389,23 @@ static void ggml_cuda_op_bin_bcast(
389389
}
390390

391391
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
392-
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_repeat, 0>>(dst, dst->src[0], dst, nullptr, dst->src[0]->data, dst->data, ctx.stream());
392+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_repeat, 0>>(dst, dst->src[0], dst, nullptr, tensor_data(dst->src[0]), tensor_data(dst), ctx.stream());
393393
}
394394

395395
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
396-
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_add>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
396+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_add>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
397397
}
398398

399399
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
400-
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_sub>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
400+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_sub>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
401401
}
402402

403403
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
404-
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_mul>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
404+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_mul>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
405405
}
406406

407407
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
408-
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_div>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
408+
ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_div>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
409409
}
410410

411411
template <float (*op)(const float, const float), int n_fuse>
@@ -417,19 +417,19 @@ static void ggml_cuda_op_fused_binbcast_impl(ggml_backend_cuda_context & ctx, gg
417417

418418
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
419419
launch_bin_bcast_pack<op, float, float, float>(src0, src1, dst,
420-
(const float *) src0->data, (const float *) src1->data, (float *) dst->data,
420+
(const float *) tensor_data(src0), (const float *) tensor_data(src1), (float *) tensor_data(dst),
421421
stream, std::make_index_sequence<n_fuse>{});
422422
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
423423
launch_bin_bcast_pack<op, half, half, half>(src0, src1, dst,
424-
(const half *) src0->data, (const half *) src1->data, (half *) dst->data,
424+
(const half *) tensor_data(src0), (const half *) tensor_data(src1), (half *) tensor_data(dst),
425425
stream, std::make_index_sequence<n_fuse>{});
426426
} else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16) {
427427
launch_bin_bcast_pack<op, half, float, half>(src0, src1, dst,
428-
(const half *) src0->data, (const float *) src1->data, (half *) dst->data,
428+
(const half *) tensor_data(src0), (const float *) tensor_data(src1), (half *) tensor_data(dst),
429429
stream, std::make_index_sequence<n_fuse>{});
430430
} else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
431431
launch_bin_bcast_pack<op, half, float, float>(src0, src1, dst,
432-
(const half *) src0->data, (const float *) src1->data, (float *) dst->data,
432+
(const half *) tensor_data(src0), (const float *) tensor_data(src1), (float *) tensor_data(dst),
433433
stream, std::make_index_sequence<n_fuse>{});
434434
} else {
435435
fprintf(stderr,
@@ -491,8 +491,8 @@ void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst
491491

492492
switch (dst->type) {
493493
case GGML_TYPE_F32: {
494-
const float * src0_d = (const float *) src0->data;
495-
float * dst_d = (float *) dst->data;
494+
const float * src0_d = (const float *) tensor_data(src0);
495+
float * dst_d = (float *) tensor_data(dst);
496496
repeat_back_cuda(src0_d, dst_d, ne00, ne01, ne02, ne03, s00, s01, s02, s03, ne0, ne1, ne2, ne3, stream);
497497
} break;
498498
default: {

ggml/src/ggml-cuda/clamp.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ static void clamp_cuda(const T * x, T * dst, const T min, const T max, const int
2424

2525
void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
2626
const ggml_tensor * src0 = dst->src[0];
27-
const void * src0_d = src0->data;
28-
void * dst_d = dst->data;
27+
const void * src0_d = tensor_data(src0);
28+
void * dst_d = tensor_data(dst);
2929
cudaStream_t stream = ctx.stream();
3030

3131
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);

ggml/src/ggml-cuda/concat.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,10 @@ void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
167167
GGML_ASSERT(dst->type == GGML_TYPE_F32);
168168

169169
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
170-
const float * src0_d = (const float *)src0->data;
171-
const float * src1_d = (const float *)src1->data;
170+
const float * src0_d = (const float *)tensor_data(src0);
171+
const float * src1_d = (const float *)tensor_data(src1);
172172

173-
float * dst_d = (float *)dst->data;
173+
float * dst_d = (float *)tensor_data(dst);
174174

175175
if (dim != 3) {
176176
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
@@ -192,7 +192,7 @@ void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
192192
dim3 grid_dim(dst->ne[1], dst->ne[2], dst->ne[3]);
193193
auto launch_kernel = [&](auto dim) {
194194
concat_f32_non_cont<dim><<<grid_dim, CUDA_CONCAT_BLOCK_SIZE, 0, stream>>>(
195-
(const char *) src0->data, (const char *) src1->data, (char *) dst->data,
195+
(const char *) tensor_data(src0), (const char *) tensor_data(src1), (char *) tensor_data(dst),
196196
src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
197197
src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
198198
src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],

ggml/src/ggml-cuda/conv-transpose-1d.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ static void conv_transpose_1d_f32_f32_cuda(
5656

5757
void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
5858
const ggml_tensor * src0 = dst->src[0];
59-
const float * src0_d = (const float *)src0->data;
59+
const float * src0_d = (const float *)tensor_data(src0);
6060

6161
const ggml_tensor * src1 = dst->src[1];
62-
const float * src1_d = (const float *)src1->data;
62+
const float * src1_d = (const float *)tensor_data(src1);
6363

64-
float * dst_d = (float *)dst->data;
64+
float * dst_d = (float *)tensor_data(dst);
6565
cudaStream_t stream = ctx.stream();
6666

6767
GGML_ASSERT(src0->type == GGML_TYPE_F32);

ggml/src/ggml-cuda/conv2d-dw.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,9 @@ void ggml_cuda_op_conv2d_dw(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
121121
const ggml_tensor * input = dst->src[1];
122122

123123
GGML_ASSERT(kernel->type == GGML_TYPE_F32 && input->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32);
124-
const float * w_d = (const float *) kernel->data;
125-
const float * x_d = (const float *) input->data;
126-
float * y_d = (float *) dst->data;
124+
const float * w_d = (const float *) tensor_data(kernel);
125+
const float * x_d = (const float *) tensor_data(input);
126+
float * y_d = (float *) tensor_data(dst);
127127

128128
const int32_t * p = (const int32_t *) dst->op_params;
129129
const int stride_x = p[0];

0 commit comments

Comments
 (0)