Skip to content

Commit d7cf477

Browse files
metax666duqimengStareAtYoujxwangmetaxzhang-chenyi
authored
[Metax] fix paddle bug register bce_loss_grad & bce_loss & index_add_grad kernels (#1907)
Co-authored-by: sw <[email protected]> Co-authored-by: duqimeng <[email protected]> Co-authored-by: Mingkun.Zhang <[email protected]> Co-authored-by: jiaxinWang-metax <[email protected]> Co-authored-by: MingkunZhang <[email protected]> Co-authored-by: chezhang <[email protected]> Co-authored-by: zhang-chenyi <[email protected]>
1 parent 0c31424 commit d7cf477

File tree

73 files changed

+8870
-1312
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+8870
-1312
lines changed

backends/metax_gpu/CMakeLists.txt

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ file(
111111
${PADDLE_SOURCE_DIR}/paddle/phi/backends/gpu/cuda/cuda_graph.cc
112112
# Core
113113
${PADDLE_SOURCE_DIR}/paddle/phi/core/enforce.cc
114+
${PADDLE_SOURCE_DIR}/paddle/phi/core/mixed_vector.cc
114115
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cusparse.cc
115116
# kernels/Funcs
116117
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/*.cu
@@ -163,13 +164,11 @@ file(
163164
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/diag_grad_kernel.cu
164165
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/einsum_kernel.cu
165166
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/einsum_grad_kernel.cu
166-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
167167
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu
168168
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/nvjpeg.cc
169169
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cupti.cc
170170
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_kernel.cu
171171
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
172-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_with_scaled_gradient_grad_kernel_register.cu
173172
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_kernel.cu
174173
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_grad_kernel.cu
175174
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_as_grad_kernel.cu
@@ -239,6 +238,8 @@ file(
239238
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/where_grad_kernel.cu
240239
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/where_kernel.cu
241240
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/empty_kernel.cc
241+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lerp_grad_kernel.cu
242+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lerp_kernel.cu
242243
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/flatten_kernel.cc
243244
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/flatten_grad_kernel.cc
244245
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_all_kernel.cc
@@ -406,7 +407,6 @@ file(
406407
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/radam_kernel.cu
407408
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/random_routing_kernel.cu
408409
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/renorm_grad_kernel.cu
409-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rmsprop_kernel.cu
410410
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scale_kernel.cu
411411
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/randperm_kernel.cu
412412
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_as_grad_kernel.cu
@@ -461,8 +461,10 @@ file(
461461
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unfold_kernel.cu
462462
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unfold_grad_kernel.cu
463463
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unpool_kernel.cu
464+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lstsq_kernel.cu
464465
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unpool_grad_kernel.cu
465466
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unstack_grad_kernel_register.cu
467+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stack_grad_kernel.cu
466468
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unstack_kernel.cu
467469
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu
468470
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/warprnnt_grad_kernel.cu
@@ -473,6 +475,7 @@ file(
473475
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaincc_kernel.cu
474476
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaincc_grad_kernel.cu
475477
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/llm_int8_linear_kernel.cu
478+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/baddbmm_kernel.cu
476479
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/baddbmm_grad_kernel.cu
477480
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/load_kernel.cu
478481
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/load_combine_kernel.cu
@@ -481,6 +484,13 @@ file(
481484
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/save_kernel.cu
482485
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dropout_kernel.cu
483486
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
487+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_add_grad_kernel.cu
488+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bce_loss_kernel.cu
489+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu
490+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc
491+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc
492+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad3d_grad_kernel.cu
493+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad3d_kernel.cu
484494
# ############################################################################
485495
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/array_grad_kernel.cc
486496
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/set_kernel.cc
@@ -547,6 +557,7 @@ file(
547557
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sync_batch_norm_kernel.cu
548558
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/unary_grad_kernel.cu
549559
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sum_grad_kernel.cu
560+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/transfer_layout_kernel.cc
550561
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/elementwise_grad_kernel.cu
551562
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/mask_kernel.cu
552563
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/ext_build_src_rank_and_local_expert_id_kernel.cu
@@ -595,8 +606,39 @@ file(
595606
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_swiglu_weighted_bwd_kernel.cu
596607
${PADDLE_SOURCE_DIR}/paddle/phi/core/flags.cc
597608
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math_function.cc
609+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_kernel.cu
610+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_grad_kernel.cu
598611
# ${PADDLE_SOURCE_DIR}/paddle/phi/backends/context_pool.cc
599612
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.cu
613+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/binomial_kernel.cu
614+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bernoulli_kernel.cu
615+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_grad_kernel_impl.h
616+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_kernel.cu
617+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/box_coder_kernel.cu
618+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_tensors_kernel.cu
619+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu
620+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/channel_shuffle_grad_kernel.cu
621+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/channel_shuffle_kernel.cu
622+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/complex_grad_kernel.cu
623+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/complex_kernel.cu
624+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_maxmin_grad_kernel.cu
625+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_maxmin_kernel.cu
626+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/digamma_kernel.cu
627+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/digamma_grad_kernel.cu
628+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dot_kernel.cu
629+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dot_grad_kernel.cu
630+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/eigh_grad_kernel.cu
631+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/eigvalsh_grad_kernel.cu
632+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/exponential_kernel.cu
633+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/flip_kernel.cu
634+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaincc_grad_kernel.cu
635+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_tree_kernel.cu
636+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
637+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_sample_neighbors_kernel.cu
638+
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_kernel.cu
639+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
640+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_grad_kernel.cu
641+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu
600642
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_act_dequant_kernel.cu
601643
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/block_multi_head_attention_kernel.cu
602644
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_weighted_swiglu_act_quant_kernel.cu
@@ -627,7 +669,6 @@ file(
627669
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/reduce_kernel.cu
628670
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/reduce_max_kernel.cu
629671
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/array_kernel.cc
630-
${CMAKE_SOURCE_DIR}/kernels/funcs/blas/cublas.cc
631672
${CMAKE_SOURCE_DIR}/kernels/gpudnn/cudnn.cc
632673
${CMAKE_SOURCE_DIR}/kernels/metax_context.cc
633674
${CMAKE_SOURCE_DIR}/kernels/cross_entropy_kernel_register.cu
@@ -642,8 +683,6 @@ list(
642683
REMOVE_ITEM
643684
CUDA_SRCS
644685
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/gru_compute.cu
645-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/matrix_solve.cu
646-
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/matrix_inverse.cu
647686
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/multihead_matmul_functor.cu
648687
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/softmax.cu
649688
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/weight_only_gemv.cu
@@ -672,6 +711,7 @@ file(
672711
kernels/gpudnn/*.cu
673712
kernels/cuda_kernels/*.cc
674713
kernels/cuda_kernels/*.cu
714+
kernels/funcs/blas/*.cc
675715
kernels/ernie_core/*.cu
676716
kernels/ernie_core/rms_norm_kernel_register.cu
677717
kernels/ernie_core/top_p_sampling_kernel_register.cu

0 commit comments

Comments
 (0)