Skip to content

Commit 30839a3

Browse files
author
Bodhi Hu
committed
update comments
1 parent ad99063 commit 30839a3

File tree

2 files changed

+2
-10
lines changed

2 files changed

+2
-10
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,14 +1358,9 @@ static cudaError_t ggml_cuda_Memcpy2DPeerAsync(
13581358
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA)
13591359
}
13601360

1361-
// CLI:
1362-
// ./build_musa/bin/llama-cli -m ~/models/deepseek-r1-7B-Q4_K_M.gguf -ngl 28 -t 8 -p "摩尔线程是一家HQ北京的国产GPU 及 AI 公司,他们正在" -n 10 -no-cnv --cache-type-k q8_0 -fa
13631361
static double ticks_total, ticks_quant, ticks_op;
13641362
// stats: | ticks_total | ticks_quant | ticks_mul_mat
1365-
// base | | |
1366-
// | | |
1367-
// dnn-shfl | 2.119177099 | 0.019638826 | 2.096463255
1368-
// | | 0.92% | 98.93%
1363+
// | 2.119177099 | 0.019638826 | 2.096463255
13691364
static void ggml_cuda_op_mul_mat(
13701365
ggml_backend_cuda_context & ctx,
13711366
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, ggml_cuda_op_mul_mat_t op,

ggml/src/ggml-cuda/mmvq.cu

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ static constexpr __device__ int get_vdr_mmvq(ggml_type type) {
4747
1;
4848
}
4949

50-
<<<<<<< HEAD
5150
enum mmvq_parameter_table_id {
5251
MMVQ_PARAMETERS_GENERIC = 0,
5352
MMVQ_PARAMETERS_GCN,
@@ -127,10 +126,8 @@ static constexpr __host__ __device__ int calc_rows_per_block(int ncols_y, int ta
127126
}
128127
return 1;
129128
}
130-
=======
131-
static __device__ uint64_t ticks_total = 0, ticks_vecdotq = 0, ticks_reduce_sum = 0;
132-
>>>>>>> c9e3fd9c (MUSA: enable fastfp16, correct warp reduce impl and other changes)
133129

130+
static __device__ uint64_t ticks_total = 0, ticks_vecdotq = 0, ticks_reduce_sum = 0;
134131
template <ggml_type type, int ncols_y>
135132
// tell the compiler to use as many registers as it wants, see nwarps definition below
136133
__launch_bounds__(calc_nwarps(ncols_y, get_device_table_id())*ggml_cuda_get_physical_warp_size(), 1)

0 commit comments

Comments
 (0)