From fcd8bcbdf55713b2373876efc2338873c1e3e043 Mon Sep 17 00:00:00 2001 From: wangjiabao <204268140@qq.com> Date: Sat, 27 Sep 2025 16:56:21 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E3=80=90CUDA=20Kernel=20No.8=E3=80=91fused?= =?UTF-8?q?=5Fseqpool=5Fcvm=E7=AE=97=E5=AD=90Kernel=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu | 2 +- .../kernels/fusion/fused_seqpool_cvm_kernel_register.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu index 040aadaddbd..88c3ad47461 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/mixed_vector.h" -#include "paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_kernel.cu" //NOLINT +#include "paddle/phi/kernels/fused_seqpool_cvm_kernel.h" PD_CUSTOM_KERNEL_REGISTER(fused_seqpool_cvm, iluvatar_gpu, diff --git a/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu index 2505a742a06..5b552b6298d 100644 --- a/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/mixed_vector.h" -#include "paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_kernel.cu" //NOLINT +#include "paddle/phi/kernels/fused_seqpool_cvm_kernel.h" PD_CUSTOM_KERNEL_REGISTER(fused_seqpool_cvm, metax_gpu, From 9af8449f711e9d22663531dbd119c3f15eb7fab9 Mon Sep 17 00:00:00 2001 From: wangjiabao <204268140@qq.com> Date: Sat, 27 Sep 2025 16:59:56 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E3=80=90CUDA=20Kernel=20No.9=E3=80=91fused?= =?UTF-8?q?=5Fsoftmax=5Fmask=5Fgrad=E7=AE=97=E5=AD=90Kernel=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cuda_kernels/fused_softmax_mask_grad_kernel_register.cu | 2 +- .../kernels/fusion/fused_softmax_mask_grad_kernel_register.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu index 0bb64afd52e..800a8c3f23d 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" -#include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu" // NOLINT +#include "paddle/phi/kernels/fused_softmax_mask_grad_kernel.h" #include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_softmax_mask_grad, diff --git a/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu index 55883496a72..2c596c6a4eb 100644 --- a/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" -#include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu" // NOLINT +#include "paddle/phi/kernels/fused_softmax_mask_grad_kernel.h" #include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_softmax_mask_grad, From f48b21b6a48f3c23bd8558308fff00fc99a50d52 Mon Sep 17 00:00:00 2001 From: wangjiabao <204268140@qq.com> Date: Sat, 27 Sep 2025 17:02:32 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E3=80=90CUDA=20Kernel=20No.13=E3=80=91fuse?= =?UTF-8?q?d=5Ftranspose=5Fsplit=5Fquant=E7=AE=97=E5=AD=90Kernel=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cuda_kernels/fused_transpose_split_quant_kernel_register.cu | 2 +- .../fusion/fused_transpose_split_quant_kernel_register.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu index 8dc51bbc279..fc2c31363bf 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.cu" //NOLINT +#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.h" #include "paddle/phi/kernels/fusion/gpu/quant_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_transpose_split_quant, diff --git a/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu index 6dc3b27f2c2..3826f437718 100644 --- a/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.cu" //NOLINT +#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.h" #include "paddle/phi/kernels/fusion/gpu/quant_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_transpose_split_quant, From 06dd4eb8aa7d8a6257f47e03d3e4fc7a14d4b62c Mon Sep 17 00:00:00 2001 From: wangjiabao <204268140@qq.com> Date: Sat, 27 Sep 2025 17:12:06 +0800 Subject: [PATCH 4/6] =?UTF-8?q?Revert=20"=E3=80=90CUDA=20Kernel=20No.8?= =?UTF-8?q?=E3=80=91fused=5Fseqpool=5Fcvm=E7=AE=97=E5=AD=90Kernel=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit fcd8bcbdf55713b2373876efc2338873c1e3e043. --- .../kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu | 2 +- .../kernels/fusion/fused_seqpool_cvm_kernel_register.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu index 88c3ad47461..040aadaddbd 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_seqpool_cvm_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/mixed_vector.h" -#include "paddle/phi/kernels/fused_seqpool_cvm_kernel.h" +#include "paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_kernel.cu" //NOLINT PD_CUSTOM_KERNEL_REGISTER(fused_seqpool_cvm, iluvatar_gpu, diff --git a/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu index 5b552b6298d..2505a742a06 100644 --- a/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_seqpool_cvm_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/mixed_vector.h" -#include "paddle/phi/kernels/fused_seqpool_cvm_kernel.h" +#include "paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_kernel.cu" //NOLINT PD_CUSTOM_KERNEL_REGISTER(fused_seqpool_cvm, metax_gpu, From 28f785de449838bcbd9a2e269dfe85636b120cac Mon Sep 17 00:00:00 2001 From: wangjiabao <204268140@qq.com> Date: Sat, 27 Sep 2025 17:12:17 +0800 Subject: [PATCH 5/6] =?UTF-8?q?Revert=20"=E3=80=90CUDA=20Kernel=20No.9?= =?UTF-8?q?=E3=80=91fused=5Fsoftmax=5Fmask=5Fgrad=E7=AE=97=E5=AD=90Kernel?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9af8449f711e9d22663531dbd119c3f15eb7fab9. --- .../cuda_kernels/fused_softmax_mask_grad_kernel_register.cu | 2 +- .../kernels/fusion/fused_softmax_mask_grad_kernel_register.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu index 800a8c3f23d..0bb64afd52e 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_softmax_mask_grad_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" -#include "paddle/phi/kernels/fused_softmax_mask_grad_kernel.h" +#include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu" // NOLINT #include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_softmax_mask_grad, diff --git a/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu index 2c596c6a4eb..55883496a72 100644 --- a/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_softmax_mask_grad_kernel_register.cu @@ -14,7 +14,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" -#include "paddle/phi/kernels/fused_softmax_mask_grad_kernel.h" +#include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu" // NOLINT #include "paddle/phi/kernels/fusion/gpu/fused_softmax_mask_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_softmax_mask_grad, From 69dbc869649a55880b627d634913998d77e9d3f6 Mon Sep 17 00:00:00 2001 From: wangjiabao <204268140@qq.com> Date: Sat, 27 Sep 2025 20:15:37 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E5=AF=B9feature13=E9=87=8D=E6=96=B0?= =?UTF-8?q?=E8=B7=91=E4=B8=80=E9=81=8Dpre-commit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cuda_kernels/fused_transpose_split_quant_kernel_register.cu | 2 +- .../fusion/fused_transpose_split_quant_kernel_register.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu index fc2c31363bf..3e829e02d0b 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/fused_transpose_split_quant_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.h" +#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.h" #include "paddle/phi/kernels/fusion/gpu/quant_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_transpose_split_quant, diff --git a/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu b/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu index 3826f437718..d5ec219b6cd 100644 --- a/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu +++ b/backends/metax_gpu/kernels/fusion/fused_transpose_split_quant_kernel_register.cu @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.h" +#include "paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.h" #include "paddle/phi/kernels/fusion/gpu/quant_utils.h" PD_CUSTOM_KERNEL_REGISTER(fused_transpose_split_quant,