PaddlePaddle
diff --git a/‎paddle/fluid/inference/tensorrt/plugin/multihead_matmul_roformer_plugin.cu‎
Lines changed: 3 additions & 3 deletions b/‎paddle/fluid/inference/tensorrt/plugin/multihead_matmul_roformer_plugin.cu‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu‎
Lines changed: 3 additions & 3 deletions b/‎paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎paddle/fluid/operators/fused/CMakeLists.txt‎
Lines changed: 0 additions & 3 deletions b/‎paddle/fluid/operators/fused/CMakeLists.txt‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎paddle/fluid/operators/fused/multihead_matmul_op.cc‎
Lines changed: 0 additions & 116 deletions b/‎paddle/fluid/operators/fused/multihead_matmul_op.cc‎
Lines changed: 0 additions & 116 deletions
@@ -22,9 +22,9 @@
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/inference/tensorrt/plugin/common/common.cuh"
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h"
-#include "paddle/fluid/operators/math/bert_encoder_functor.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/multihead_matmul_functor.h"
 
 namespace paddle {
 namespace inference {
@@ -254,7 +254,7 @@ int MultiheadMatmulRoformerPlugin::enqueue(
             platform::CUDAPlace(device_id)));
 
     const phi::GPUContext &dev_ctx = *device_ctx;
-    operators::math::MultiHeadGPUComputeFunctor<float> multihead_compute_func;
+    phi::funcs::MultiheadGPUComputeFunctor<float> multihead_compute_func;
     multihead_compute_func(dev_ctx,
                            batch,
                            seq_len,
@@ -341,7 +341,7 @@ int MultiheadMatmulRoformerPlugin::enqueue(
         tptr, static_cast<half>(scale_), n_q);
 
     const phi::GPUContext &dev_ctx = *device_ctx;
-    operators::math::MultiHeadGPUComputeFunctor<half> multihead_compute_func;
+    phi::funcs::MultiheadGPUComputeFunctor<half> multihead_compute_func;
     multihead_compute_func(dev_ctx,
                            batch,
                            seq_len,
 
@@ -24,9 +24,9 @@
 #include "paddle/fluid/inference/tensorrt/plugin/common/common.cuh"
 #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h"
-#include "paddle/fluid/operators/math/bert_encoder_functor.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/multihead_matmul_functor.h"
 
 namespace paddle {
 namespace inference {
@@ -396,7 +396,7 @@ int QkvToContextPluginDynamic::enqueue(
             platform::CUDAPlace(device_id)));
 
     const phi::GPUContext &dev_ctx = *device_ctx;
-    operators::math::MultiHeadGPUComputeFunctor<float> multihead_compute_func;
+    phi::funcs::MultiheadGPUComputeFunctor<float> multihead_compute_func;
     multihead_compute_func(dev_ctx,
                            batch,
                            seq_len,
@@ -506,7 +506,7 @@ int QkvToContextPluginDynamic::enqueue(
         tptr, static_cast<half>(scale_), n_q);
 
     const phi::GPUContext &dev_ctx = *device_ctx;
-    operators::math::MultiHeadGPUComputeFunctor<half> multihead_compute_func;
+    phi::funcs::MultiheadGPUComputeFunctor<half> multihead_compute_func;
     multihead_compute_func(dev_ctx,
                            batch,
                            seq_len,
 
@@ -10,7 +10,6 @@ register_operators(
   fusion_transpose_flatten_concat_op
   fusion_conv_inception_op
   fused_fc_elementwise_layernorm_op
-  multihead_matmul_op
   self_dp_attention_op
   skip_layernorm_op
   yolo_box_head_op
@@ -74,8 +73,6 @@ if(WITH_GPU OR WITH_ROCM)
   endif()
   # fused_fc_elementwise_layernorm_op
   op_library(fused_fc_elementwise_layernorm_op)
-  # multihead_matmul_op
-  op_library(multihead_matmul_op)
   op_library(skip_layernorm_op)
   op_library(yolo_box_head_op)
   op_library(yolo_box_post_op)