@@ -555,8 +555,8 @@ static enum ggml_status ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer
555555
556556    if  (ggml_is_quantized (tensor->type ) && tensor->view_src  == nullptr  && ggml_backend_buffer_get_usage (buffer) != GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
557557        //  initialize padding to 0 to avoid possible NaN values
558-         size_t  original_size = ggml_nbytes (tensor);
559-         size_t  padded_size = ggml_backend_buft_get_alloc_size (buffer->buft , tensor);
558+         const   size_t  original_size = ggml_nbytes (tensor);
559+         const   size_t  padded_size = ggml_backend_buft_get_alloc_size (buffer->buft , tensor);
560560
561561        if  (padded_size > original_size) {
562562            ggml_cuda_set_device (ctx->device );
@@ -679,6 +679,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
679679
680680    if  (ggml_is_quantized (tensor->type )) {
681681        if  (ne0 % MATRIX_ROW_PADDING != 0 ) {
682+             GGML_ASSERT (tensor->nb [0 ] == ggml_element_size (tensor));
682683            size += ggml_row_size (tensor->type , MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
683684        }
684685    }
@@ -800,6 +801,7 @@ static void * ggml_backend_cuda_split_buffer_get_base(ggml_backend_buffer_t buff
800801
801802static  enum  ggml_status ggml_backend_cuda_split_buffer_init_tensor (ggml_backend_buffer_t  buffer, ggml_tensor * tensor) {
802803    GGML_ASSERT (tensor->view_src  == nullptr ); //  views of split tensors are not supported
804+     GGML_ASSERT (ggml_is_contiguous (tensor) && " split buffers only supported for contiguous tensors" 
803805
804806    ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context ;
805807    ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *)buffer->buft ->context ;
@@ -851,6 +853,7 @@ static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buffer_t buff
851853    //  split tensors must always be set in their entirety at once
852854    GGML_ASSERT (offset == 0 );
853855    GGML_ASSERT (size == ggml_nbytes (tensor));
856+     GGML_ASSERT (ggml_is_contiguous (tensor) && " split buffers only supported for contiguous tensors" 
854857
855858    ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *)buffer->buft ->context ;
856859
@@ -889,6 +892,7 @@ static void ggml_backend_cuda_split_buffer_get_tensor(ggml_backend_buffer_t buff
889892    //  split tensors must always be set in their entirety at once
890893    GGML_ASSERT (offset == 0 );
891894    GGML_ASSERT (size == ggml_nbytes (tensor));
895+     GGML_ASSERT (ggml_is_contiguous (tensor) && " split buffers only supported for contiguous tensors" 
892896
893897    ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *)buffer->buft ->context ;
894898
@@ -970,6 +974,7 @@ static size_t ggml_backend_cuda_split_buffer_type_get_alignment(ggml_backend_buf
970974
971975static  size_t  ggml_backend_cuda_split_buffer_type_get_alloc_size (ggml_backend_buffer_type_t  buft, const  ggml_tensor * tensor) {
972976    ggml_backend_cuda_split_buffer_type_context * ctx = (ggml_backend_cuda_split_buffer_type_context *)buft->context ;
977+     GGML_ASSERT (ggml_is_contiguous (tensor) && " split buffers only supported for contiguous tensors" 
973978
974979    size_t  total_size = 0 ;
975980
@@ -2065,6 +2070,7 @@ static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor *
20652070        src0_slice.ne [2 ] = 1 ;
20662071        src0_slice.nb [3 ] = src0_slice.nb [2 ];
20672072        src0_slice.data   = (char  *) src0->data  + i02*nb02;
2073+         GGML_ASSERT (!ggml_cuda_should_use_mmq (src0->type , cc, ne11) || ne00 % MATRIX_ROW_PADDING == 0 );
20682074
20692075        ggml_tensor src1_slice;
20702076        memset (&src1_slice, 0 , sizeof (src1_slice));
0 commit comments