ggml-org
diff --git a/‎ggml/src/ggml-cuda/acc.cu‎
Lines changed: 3 additions & 3 deletions b/‎ggml/src/ggml-cuda/acc.cu‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎ggml/src/ggml-cuda/add-id.cu‎
Lines changed: 4 additions & 4 deletions b/‎ggml/src/ggml-cuda/add-id.cu‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎ggml/src/ggml-cuda/arange.cu‎
Lines changed: 1 addition & 1 deletion b/‎ggml/src/ggml-cuda/arange.cu‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ggml/src/ggml-cuda/argmax.cu‎
Lines changed: 2 additions & 2 deletions b/‎ggml/src/ggml-cuda/argmax.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml/src/ggml-cuda/argsort.cu‎
Lines changed: 2 additions & 2 deletions b/‎ggml/src/ggml-cuda/argsort.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml/src/ggml-cuda/binbcast.cu‎
Lines changed: 13 additions & 13 deletions b/‎ggml/src/ggml-cuda/binbcast.cu‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎ggml/src/ggml-cuda/clamp.cu‎
Lines changed: 2 additions & 2 deletions b/‎ggml/src/ggml-cuda/clamp.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml/src/ggml-cuda/concat.cu‎
Lines changed: 4 additions & 4 deletions b/‎ggml/src/ggml-cuda/concat.cu‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎ggml/src/ggml-cuda/conv-transpose-1d.cu‎
Lines changed: 3 additions & 3 deletions b/‎ggml/src/ggml-cuda/conv-transpose-1d.cu‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎ggml/src/ggml-cuda/conv2d-dw.cu‎
Lines changed: 3 additions & 3 deletions b/‎ggml/src/ggml-cuda/conv2d-dw.cu‎
Lines changed: 3 additions & 3 deletions
@@ -38,9 +38,9 @@ void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const ggml_tensor * src0 = dst->src[0];
     const ggml_tensor * src1 = dst->src[1];
 
-    const float * src0_d = (const float *) src0->data;
-    const float * src1_d = (const float *) src1->data;
-    float       * dst_d  = (float       *)  dst->data;
+    const float * src0_d = (const float *) tensor_data(src0);
+    const float * src1_d = (const float *) tensor_data(src1);
+    float       * dst_d  = (float       *)  tensor_data(dst);
 
     cudaStream_t stream = ctx.stream();
 
 
@@ -41,10 +41,10 @@ void ggml_cuda_op_add_id(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(nb10 == sizeof(float));
     GGML_ASSERT(nb20 == sizeof(int32_t));
 
-    const float * src0_d = (const float *)src0->data;
-    const float * src1_d = (const float *)src1->data;
-    const int32_t * src2_d = (const int32_t *)src2->data;
-    float * dst_d = (float *)dst->data;
+    const float * src0_d = (const float *)tensor_data(src0);
+    const float * src1_d = (const float *)tensor_data(src1);
+    const int32_t * src2_d = (const int32_t *)tensor_data(src2);
+    float * dst_d = (float *)tensor_data(dst);
 
     int threads = std::min((int)ne00, 768); // cols
     dim3 blocks(ne01, ne02); // n_experts_used, n_tokens
 
@@ -15,7 +15,7 @@ static void arange_f32_cuda(float * dst, const int ne0, const float start, const
 }
 
 void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    float * dst_d = (float *)dst->data;
+    float * dst_d = (float *)tensor_data(dst);
     cudaStream_t stream = ctx.stream();
 
     GGML_ASSERT(dst->type == GGML_TYPE_F32);
 
@@ -77,8 +77,8 @@ void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const int64_t ne00  = src0->ne[0];
     const int64_t nrows = ggml_nrows(src0);
 
-    const float * src0_d = (const float *) src0->data;
-    int32_t     * dst_d  = (int32_t     *) dst->data;
+    const float * src0_d = (const float *) tensor_data(src0);
+    int32_t     * dst_d  = (int32_t     *) tensor_data(dst);
 
     cudaStream_t stream = ctx.stream();
 
 
@@ -87,8 +87,8 @@ static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, co
 
 void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const ggml_tensor * src0 = dst->src[0];
-    const float * src0_d = (const float *)src0->data;
-    float * dst_d = (float *)dst->data;
+    const float * src0_d = (const float *)tensor_data(src0);
+    float * dst_d = (float *)tensor_data(dst);
     cudaStream_t stream = ctx.stream();
 
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
 
@@ -286,7 +286,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor *
                     ne12, ne13,
                     /* s0, */ s1, s2, s3,
                     /* s00,*/ s01, s02, s03,
-                    /* s10,*/ s11, s12, s13, (const src1_t *) dst->src[I + 1]->data...);
+                    /* s10,*/ s11, s12, s13, (const src1_t *) tensor_data(dst->src[I + 1])...);
             } else {
                 k_bin_bcast_unravel<bin_op, src0_t, src1_t, dst_t>
                     <<<block_num, block_size, 0, stream>>>(src0_dd, src1_dd, dst_dd, ne0_fastdiv, ne1_fastdiv,
@@ -302,7 +302,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor *
                     src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3_fastdiv, ne10, ne11, ne12, ne13,
                     /* s0, */ s1, s2, s3,
                     /* s00,*/ s01, s02, s03,
-                    /* s10,*/ s11, s12, s13, (const src1_t *) dst->src[I + 1]->data...);
+                    /* s10,*/ s11, s12, s13, (const src1_t *) tensor_data(dst->src[I + 1])...);
             } else {
                 k_bin_bcast<bin_op, src0_t, src1_t, dst_t><<<block_nums, block_dims, 0, stream>>>(
                     src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3_fastdiv, ne10, ne11, ne12, ne13,
@@ -389,23 +389,23 @@ static void ggml_cuda_op_bin_bcast(
 }
 
 void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_repeat, 0>>(dst, dst->src[0], dst, nullptr, dst->src[0]->data, dst->data, ctx.stream());
+    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_repeat, 0>>(dst, dst->src[0], dst, nullptr, tensor_data(dst->src[0]), tensor_data(dst), ctx.stream());
 }
 
 void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_add>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
+    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_add>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
 }
 
 void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_sub>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
+    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_sub>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
 }
 
 void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_mul>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
+    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_mul>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
 }
 
 void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
-    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_div>>(dst->src[0], dst->src[1], dst, dst->src[0]->data, dst->src[1]->data, dst->data, ctx.stream());
+    ggml_cuda_op_bin_bcast<bin_bcast_cuda<op_div>>(dst->src[0], dst->src[1], dst, tensor_data(dst->src[0]), tensor_data(dst->src[1]), tensor_data(dst), ctx.stream());
 }
 
 template <float (*op)(const float, const float), int n_fuse>
@@ -417,19 +417,19 @@ static void ggml_cuda_op_fused_binbcast_impl(ggml_backend_cuda_context & ctx, gg
 
     if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
         launch_bin_bcast_pack<op, float, float, float>(src0, src1, dst,
-            (const float *) src0->data, (const float *) src1->data, (float *) dst->data,
+            (const float *) tensor_data(src0), (const float *) tensor_data(src1), (float *) tensor_data(dst),
             stream, std::make_index_sequence<n_fuse>{});
     } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
         launch_bin_bcast_pack<op, half, half, half>(src0, src1, dst,
-            (const half *) src0->data, (const half *) src1->data, (half *) dst->data,
+            (const half *) tensor_data(src0), (const half *) tensor_data(src1), (half *) tensor_data(dst),
             stream, std::make_index_sequence<n_fuse>{});
     } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16) {
         launch_bin_bcast_pack<op, half, float, half>(src0, src1, dst,
-            (const half *) src0->data, (const float *) src1->data, (half *) dst->data,
+            (const half *) tensor_data(src0), (const float *) tensor_data(src1), (half *) tensor_data(dst),
             stream, std::make_index_sequence<n_fuse>{});
     } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) {
         launch_bin_bcast_pack<op, half, float, float>(src0, src1, dst,
-            (const half *) src0->data, (const float *) src1->data, (float *) dst->data,
+            (const half *) tensor_data(src0), (const float *) tensor_data(src1), (float *) tensor_data(dst),
             stream, std::make_index_sequence<n_fuse>{});
     } else {
         fprintf(stderr,
@@ -491,8 +491,8 @@ void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst
 
     switch (dst->type) {
         case GGML_TYPE_F32: {
-            const float * src0_d = (const float *) src0->data;
-            float       * dst_d  = (float       *) dst->data;
+            const float * src0_d = (const float *) tensor_data(src0);
+            float       * dst_d  = (float       *) tensor_data(dst);
             repeat_back_cuda(src0_d, dst_d, ne00, ne01, ne02, ne03, s00, s01, s02, s03, ne0, ne1, ne2, ne3, stream);
         } break;
         default: {
 
@@ -24,8 +24,8 @@ static void clamp_cuda(const T * x, T * dst, const T min, const T max, const int
 
 void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const ggml_tensor * src0 = dst->src[0];
-    const void * src0_d = src0->data;
-    void * dst_d = dst->data;
+    const void * src0_d = tensor_data(src0);
+    void * dst_d = tensor_data(dst);
     cudaStream_t stream = ctx.stream();
 
     GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
 
@@ -167,10 +167,10 @@ void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(dst->type  == GGML_TYPE_F32);
 
     if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
-        const float * src0_d = (const float *)src0->data;
-        const float * src1_d = (const float *)src1->data;
+        const float * src0_d = (const float *)tensor_data(src0);
+        const float * src1_d = (const float *)tensor_data(src1);
 
-        float * dst_d = (float *)dst->data;
+        float * dst_d = (float *)tensor_data(dst);
 
         if (dim != 3) {
             for (int i3 = 0; i3 < dst->ne[3]; i3++) {
@@ -192,7 +192,7 @@ void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
         dim3 grid_dim(dst->ne[1], dst->ne[2], dst->ne[3]);
         auto launch_kernel = [&](auto dim) {
             concat_f32_non_cont<dim><<<grid_dim, CUDA_CONCAT_BLOCK_SIZE, 0, stream>>>(
-                (const char *) src0->data, (const char *) src1->data, (char *) dst->data,
+                (const char *) tensor_data(src0), (const char *) tensor_data(src1), (char *) tensor_data(dst),
                 src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3],
                 src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
                 src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
 
@@ -56,12 +56,12 @@ static void conv_transpose_1d_f32_f32_cuda(
 
 void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const ggml_tensor * src0 = dst->src[0];
-    const float * src0_d = (const float *)src0->data;
+    const float * src0_d = (const float *)tensor_data(src0);
 
     const ggml_tensor * src1 = dst->src[1];
-    const float * src1_d = (const float *)src1->data;
+    const float * src1_d = (const float *)tensor_data(src1);
 
-    float * dst_d = (float *)dst->data;
+    float * dst_d = (float *)tensor_data(dst);
     cudaStream_t stream = ctx.stream();
 
     GGML_ASSERT(src0->type == GGML_TYPE_F32);
 
@@ -121,9 +121,9 @@ void ggml_cuda_op_conv2d_dw(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
     const ggml_tensor * input  = dst->src[1];
 
     GGML_ASSERT(kernel->type == GGML_TYPE_F32 && input->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32);
-    const float * w_d = (const float *) kernel->data;
-    const float * x_d = (const float *) input->data;
-    float *       y_d = (float *) dst->data;
+    const float * w_d = (const float *) tensor_data(kernel);
+    const float * x_d = (const float *) tensor_data(input);
+    float *       y_d = (float *) tensor_data(dst);
 
     const int32_t * p          = (const int32_t *) dst->op_params;
     const int       stride_x   = p[0];
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ static void arange_f32_cuda(float * dst, const int ne0, const float start, const`
`15`	`15`	`}`
`16`	`16`
`17`	`17`	`void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {`
`18`		`- float * dst_d = (float *)dst->data;`
	`18`	`+ float * dst_d = (float *)tensor_data(dst);`
`19`	`19`	`cudaStream_t stream = ctx.stream();`
`20`	`20`
`21`	`21`	`GGML_ASSERT(dst->type == GGML_TYPE_F32);`