@@ -6074,9 +6074,9 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
6074
6074
return false ;
6075
6075
}
6076
6076
6077
- void forward_get_rows (const ggml_compute_params *params,
6078
- ggml_tensor *dst) {
6079
- const ggml_tensor *src0 = dst->src [0 ];
6077
+ void forward_get_rows (const ggml_compute_params * params,
6078
+ ggml_tensor * dst) {
6079
+ const ggml_tensor * src0 = dst->src [0 ];
6080
6080
6081
6081
switch (src0->type ) {
6082
6082
case GGML_TYPE_Q4_0: {
@@ -6089,10 +6089,10 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
6089
6089
}
6090
6090
6091
6091
static void ggml_compute_forward_get_rows_q4_0x8 (
6092
- const ggml_compute_params *params,
6093
- ggml_tensor *dst) {
6094
- const ggml_tensor *src0 = dst->src [0 ];
6095
- const ggml_tensor *src1 = dst->src [1 ];
6092
+ const ggml_compute_params * params,
6093
+ ggml_tensor * dst) {
6094
+ const ggml_tensor * src0 = dst->src [0 ];
6095
+ const ggml_tensor * src1 = dst->src [1 ];
6096
6096
6097
6097
GGML_TENSOR_BINARY_OP_LOCALS
6098
6098
@@ -6132,10 +6132,10 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
6132
6132
int row_group_idx = i01 / nrows_interleaved;
6133
6133
const int row_idx_in_group = i01 % nrows_interleaved;
6134
6134
6135
- const char *base_ptr_for_higher_dims_in_src0 = (const char *)src0->data + i11 * nb02 + i12 * nb03;
6135
+ const char * base_ptr_for_higher_dims_in_src0 = (const char *)src0->data + i11 * nb02 + i12 * nb03;
6136
6136
6137
6137
// Pointer to the first block_q4_0x8 of the identified row_group_idx
6138
- const block_q4_0x8 *p_first_repacked_block_of_group_x8 = (const block_q4_0x8 *)(base_ptr_for_higher_dims_in_src0 + row_group_idx * stride_between_actual_row_groups);
6138
+ const block_q4_0x8 * p_first_repacked_block_of_group_x8 = (const block_q4_0x8 *)(base_ptr_for_higher_dims_in_src0 + row_group_idx * stride_between_actual_row_groups);
6139
6139
6140
6140
dequantize_row_q4_0x8 (
6141
6141
p_first_repacked_block_of_group_x8,
@@ -6152,8 +6152,8 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
6152
6152
* @param row_idx_in_group Index (0-7) of the logical row to dequantize.
6153
6153
*/
6154
6154
static void dequantize_row_q4_0x8 (
6155
- const block_q4_0x8 *GGML_RESTRICT p_repacked_group_column_blocks,
6156
- float *GGML_RESTRICT y,
6155
+ const block_q4_0x8 * GGML_RESTRICT p_repacked_group_column_blocks,
6156
+ float * GGML_RESTRICT y,
6157
6157
int64_t k,
6158
6158
int row_idx_in_group) {
6159
6159
const int GGML_Q4_0_X8_INTERLEAVE_SIZE = 8 ;
@@ -6168,23 +6168,23 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
6168
6168
const int qk4_0_half_elements = QK4_0 / 2 ;
6169
6169
6170
6170
for (int i = 0 ; i < nb; ++i) {
6171
- const block_q4_0x8 *current_column_repacked_block = &p_repacked_group_column_blocks[i];
6171
+ const block_q4_0x8 * current_column_repacked_block = &p_repacked_group_column_blocks[i];
6172
6172
const float d_val = GGML_FP16_TO_FP32 (current_column_repacked_block->d [row_idx_in_group]);
6173
- float *y_curr = y + i * QK4_0;
6173
+ float * y_curr = y + i * QK4_0;
6174
6174
6175
- const int8_t *qs_first_half_repacked_ptr = &(current_column_repacked_block->qs [row_idx_in_group * bytes_for_half_elements]);
6175
+ const int8_t * qs_first_half_repacked_ptr = &(current_column_repacked_block->qs [row_idx_in_group * bytes_for_half_elements]);
6176
6176
6177
6177
uint64_t first_half_chunk_u64;
6178
6178
memcpy (&first_half_chunk_u64, qs_first_half_repacked_ptr, sizeof (uint64_t ));
6179
6179
first_half_chunk_u64 ^= xor_mask; // Reverse the XOR
6180
- const uint8_t *original_qs_first_half_bytes = (const uint8_t *)&first_half_chunk_u64;
6180
+ const uint8_t * original_qs_first_half_bytes = (const uint8_t *)&first_half_chunk_u64;
6181
6181
6182
- const int8_t *qs_second_half_repacked_ptr = &(current_column_repacked_block->qs [offset_to_second_half_data + (row_idx_in_group * bytes_for_half_elements)]);
6182
+ const int8_t * qs_second_half_repacked_ptr = &(current_column_repacked_block->qs [offset_to_second_half_data + (row_idx_in_group * bytes_for_half_elements)]);
6183
6183
6184
6184
uint64_t second_half_chunk_u64;
6185
6185
memcpy (&second_half_chunk_u64, qs_second_half_repacked_ptr, sizeof (uint64_t ));
6186
6186
second_half_chunk_u64 ^= xor_mask; // Reverse the XOR
6187
- const uint8_t *original_qs_second_half_bytes = (const uint8_t *)&second_half_chunk_u64;
6187
+ const uint8_t * original_qs_second_half_bytes = (const uint8_t *)&second_half_chunk_u64;
6188
6188
6189
6189
// dequantizing all QK4_0's for this block.
6190
6190
for (int j = 0 ; j < bytes_for_half_elements; ++j) {
@@ -6530,10 +6530,10 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
6530
6530
// if (op->src[1]->type == GGML_TYPE_Q8_0) {
6531
6531
// return true;
6532
6532
// }
6533
- } else if (op->op == GGML_OP_GET_ROWS
6534
- && op->src [0 ]->buffer
6535
- && (ggml_n_dims (op->src [0 ]) == 2 )
6536
- && op->src [0 ]->buffer ->buft == ggml_backend_cpu_aarch64_buffer_type ()
6533
+ } else if (op->op == GGML_OP_GET_ROWS
6534
+ && op->src [0 ]->buffer
6535
+ && (ggml_n_dims (op->src [0 ]) == 2 )
6536
+ && op->src [0 ]->buffer ->buft == ggml_backend_cpu_aarch64_buffer_type ()
6537
6537
&& ggml_aarch64_get_optimal_repack_type (op->src [0 ])) {
6538
6538
if (op->src [1 ]->buffer && !ggml_backend_buft_is_host (op->src [1 ]->buffer ->buft )) {
6539
6539
return false ;
0 commit comments