intel
diff --git a/‎xla/service/gpu/onednn_matmul_utils.cc
Lines changed: 6 additions & 6 deletions b/‎xla/service/gpu/onednn_matmul_utils.cc
Lines changed: 6 additions & 6 deletions
diff --git a/‎xla/service/gpu/xetla/gemm/BUILD
Lines changed: 77 additions & 3 deletions b/‎xla/service/gpu/xetla/gemm/BUILD
Lines changed: 77 additions & 3 deletions
diff --git a/‎xla/service/gpu/xetla/gemm/dispatch_col_major.cc
Lines changed: 49 additions & 0 deletions b/‎xla/service/gpu/xetla/gemm/dispatch_col_major.cc
Lines changed: 49 additions & 0 deletions
diff --git a/‎xla/service/gpu/xetla/gemm/dispatch_col_major.h
Lines changed: 49 additions & 0 deletions b/‎xla/service/gpu/xetla/gemm/dispatch_col_major.h
Lines changed: 49 additions & 0 deletions
diff --git a/‎xla/service/gpu/xetla/gemm/dispatch_row_major.cc
Lines changed: 49 additions & 0 deletions b/‎xla/service/gpu/xetla/gemm/dispatch_row_major.cc
Lines changed: 49 additions & 0 deletions
diff --git a/‎xla/service/gpu/xetla/gemm/dispatch_row_major.h
Lines changed: 49 additions & 0 deletions b/‎xla/service/gpu/xetla/gemm/dispatch_row_major.h
Lines changed: 49 additions & 0 deletions
@@ -264,7 +264,7 @@ RunXetlaGemm(se::gpu::GpuStreamHandle handle, const MatrixDescriptor& lhs,
           policy
               .add_epilogue(
                   c_data,
-                  ::gpu::xetla::XetlaGemmKernel<InputT>::EpilogueType::RES_ADD)
+                  ::gpu::xetla::EpilogueType::RES_ADD)
               .build();
         } else {
           return true;
@@ -283,13 +283,13 @@ RunXetlaGemm(se::gpu::GpuStreamHandle handle, const MatrixDescriptor& lhs,
               .add_matrix_b(rhs)
               .add_epilogue(
                   bias_data,
-                  ::gpu::xetla::XetlaGemmKernel<InputT>::EpilogueType::BIAS)
+                  ::gpu::xetla::EpilogueType::BIAS)
               .build();
       if (fabs(beta) - 0.0f > 1e-6) {
         policy
             .add_epilogue(
                 c_data,
-                ::gpu::xetla::XetlaGemmKernel<InputT>::EpilogueType::RES_ADD,
+                ::gpu::xetla::EpilogueType::RES_ADD,
                 beta)
             .build();
       }
@@ -306,7 +306,7 @@ RunXetlaGemm(se::gpu::GpuStreamHandle handle, const MatrixDescriptor& lhs,
               .add_matrix_b(rhs)
               .add_epilogue(
                   nullptr,
-                  ::gpu::xetla::XetlaGemmKernel<InputT>::EpilogueType::GELU)
+                  ::gpu::xetla::EpilogueType::GELU)
               .build();
       if (policy.fallback() == false) {
         return !policy.run(handle);
@@ -321,10 +321,10 @@ RunXetlaGemm(se::gpu::GpuStreamHandle handle, const MatrixDescriptor& lhs,
               .add_matrix_b(rhs)
               .add_epilogue(
                   bias_data,
-                  ::gpu::xetla::XetlaGemmKernel<InputT>::EpilogueType::BIAS)
+                  ::gpu::xetla::EpilogueType::BIAS)
               .add_epilogue(
                   nullptr,
-                  ::gpu::xetla::XetlaGemmKernel<InputT>::EpilogueType::GELU)
+                  ::gpu::xetla::EpilogueType::GELU)
               .build();
       if (policy.fallback() == false) {
         return !policy.run(handle);
 
@@ -1,22 +1,96 @@
 load("//xla:xla.bzl", "xetla_library")
 
-# List all kernels here.
+xetla_library(
+    name = "gemm_common", 
+    hdrs = [
+        "gemm_common.h"
+    ],
+    copts = [
+        "-Wall",
+        "-Wno-c++11-narrowing",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//xla/service/gpu:matrix_descriptor",
+    ],
+)
+
+xetla_library(
+    name = "gemm_dispatch", 
+    hdrs = [
+        "gemm_dispatch.h",
+        "hgemm_impl.h",
+        "epilogue_impl.h",
+    ],
+    copts = [
+        "-Wall",
+        "-Wno-c++11-narrowing",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":gemm_common",
+        "//xla/service/gpu:matrix_descriptor",
+        "//xla/stream_executor/sycl:sycl_executor",
+        "@xetla//:xetla_header",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+xetla_library(
+    name = "dispatch_row_major",
+    srcs = [
+        "dispatch_row_major.cc",
+    ],
+    hdrs = [
+        "dispatch_row_major.h",
+    ],
+    copts = [
+        "-Wall",
+        "-Wno-c++11-narrowing",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":gemm_dispatch",
+        "//xla/stream_executor/sycl:sycl_executor",
+    ],
+)
+
+xetla_library(
+    name = "dispatch_col_major",
+    srcs = [
+        "dispatch_col_major.cc",
+    ],
+    hdrs = [
+        "dispatch_col_major.h",
+    ],
+    copts = [
+        "-Wall",
+        "-Wno-c++11-narrowing",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":gemm_dispatch",
+        "//xla/stream_executor/sycl:sycl_executor",
+    ],
+)
+
 xetla_library(
     name = "gemm_kernel",
     srcs = [
         "gemm.cc",
     ],
     hdrs = [
         "gemm.h",
-        "hgemm_impl.h",
-        "epilogue_impl.h",
     ],
     copts = [
         "-Wall",
         "-Wno-c++11-narrowing",
     ],
     visibility = ["//visibility:public"],
     deps = [
+        ":gemm_common",
+        ":dispatch_row_major",
+        ":dispatch_col_major",
         "//xla/service/gpu:matrix_descriptor",
         "//xla/stream_executor/sycl:sycl_executor",
         "@xetla//:xetla_header",
 
@@ -0,0 +1,49 @@
+/* Copyright (c) 2024 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/service/gpu/xetla/gemm/dispatch_col_major.h"
+
+#include "xla/service/gpu/xetla/gemm/gemm_common.h"
+#include "xla/service/gpu/xetla/gemm/gemm_dispatch.h"
+#include "xla/stream_executor/gpu/gpu_types.h"
+
+namespace gpu {
+namespace xetla {
+
+template <typename ComputeType>
+bool GemmColMajorDispatcher<ComputeType>::run(se::gpu::GpuStreamHandle handle) {
+  int WG_M = std::get<0>(selected_policy_id_);
+  int WG_N = std::get<1>(selected_policy_id_);
+  int SG_M = std::get<2>(selected_policy_id_);
+  int SG_N = std::get<3>(selected_policy_id_);
+  int SG_K = std::get<4>(selected_policy_id_);
+  int SLM_KS = std::get<5>(selected_policy_id_);
+  return gemm_policy<ComputeType>::call(WG_M, WG_N, SG_M, SG_N, SG_K, SLM_KS,
+                                        this, handle);
+}
+
+template <typename ComputeType>
+template <int WG_M, int WG_N, int SG_M, int SG_N, int SG_K, int SLM_KS>
+bool GemmColMajorDispatcher<ComputeType>::dispatch(
+    se::gpu::GpuStreamHandle handle) {
+  return do_dispatch<ComputeType, WG_M, WG_N, SG_M, SG_N, SG_K, SLM_KS, false>(
+      handle, params_);
+}
+
+template class GemmColMajorDispatcher<sycl::half>;
+template class GemmColMajorDispatcher<gpu::xetla::bf16>;
+
+}  // namespace xetla
+}  // namespace gpu
@@ -0,0 +1,49 @@
+/* Copyright (c) 2024 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef XLA_SERVICE_GPU_XETLA_GEMM_DISPATCH_COL_MAJOR_H_
+#define XLA_SERVICE_GPU_XETLA_GEMM_DISPATCH_COL_MAJOR_H_
+
+#include "xla/service/gpu/xetla/gemm/gemm_common.h"
+#include "xla/service/gpu/xetla/gemm/gemm_dispatch.h"
+#include "xla/stream_executor/gpu/gpu_types.h"
+
+namespace gpu {
+namespace xetla {
+
+template <typename ComputeType>
+class GemmColMajorDispatcher {
+ public:
+  GemmColMajorDispatcher() = default;
+
+  GemmColMajorDispatcher(
+      DispatchParams* params,
+      std::tuple<int, int, int, int, int, int> selected_policy_id)
+      : params_(params), selected_policy_id_(selected_policy_id) {}
+
+  template <int WG_M, int WG_N, int SG_M, int SG_N, int SG_K, int SLM_KS>
+  bool dispatch(se::gpu::GpuStreamHandle handle);
+
+  bool run(se::gpu::GpuStreamHandle handle);
+
+ private:
+  DispatchParams* params_;
+  std::tuple<int, int, int, int, int, int> selected_policy_id_;
+};
+
+}  // namespace xetla
+}  // namespace gpu
+
+#endif  // XLA_SERVICE_GPU_XETLA_GEMM_DISPATCH_COL_MAJOR_H_
@@ -0,0 +1,49 @@
+/* Copyright (c) 2024 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "xla/service/gpu/xetla/gemm/dispatch_row_major.h"
+
+#include "xla/service/gpu/xetla/gemm/gemm_common.h"
+#include "xla/service/gpu/xetla/gemm/gemm_dispatch.h"
+#include "xla/stream_executor/gpu/gpu_types.h"
+
+namespace gpu {
+namespace xetla {
+
+template <typename ComputeType>
+bool GemmRowMajorDispatcher<ComputeType>::run(se::gpu::GpuStreamHandle handle) {
+  int WG_M = std::get<0>(selected_policy_id_);
+  int WG_N = std::get<1>(selected_policy_id_);
+  int SG_M = std::get<2>(selected_policy_id_);
+  int SG_N = std::get<3>(selected_policy_id_);
+  int SG_K = std::get<4>(selected_policy_id_);
+  int SLM_KS = std::get<5>(selected_policy_id_);
+  return gemm_policy<ComputeType>::call(WG_M, WG_N, SG_M, SG_N, SG_K, SLM_KS,
+                                        this, handle);
+}
+
+template <typename ComputeType>
+template <int WG_M, int WG_N, int SG_M, int SG_N, int SG_K, int SLM_KS>
+bool GemmRowMajorDispatcher<ComputeType>::dispatch(
+    se::gpu::GpuStreamHandle handle) {
+  return do_dispatch<ComputeType, WG_M, WG_N, SG_M, SG_N, SG_K, SLM_KS, true>(
+      handle, params_);
+}
+
+template class GemmRowMajorDispatcher<sycl::half>;
+template class GemmRowMajorDispatcher<gpu::xetla::bf16>;
+
+}  // namespace xetla
+}  // namespace gpu
@@ -0,0 +1,49 @@
+/* Copyright (c) 2024 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef XLA_SERVICE_GPU_XETLA_GEMM_DISPATCH_ROW_MAJOR_H_
+#define XLA_SERVICE_GPU_XETLA_GEMM_DISPATCH_ROW_MAJOR_H_
+
+#include "xla/service/gpu/xetla/gemm/gemm_common.h"
+#include "xla/service/gpu/xetla/gemm/gemm_dispatch.h"
+#include "xla/stream_executor/gpu/gpu_types.h"
+
+namespace gpu {
+namespace xetla {
+
+template <typename ComputeType>
+class GemmRowMajorDispatcher {
+ public:
+  GemmRowMajorDispatcher() = default;
+
+  GemmRowMajorDispatcher(
+      DispatchParams* params,
+      std::tuple<int, int, int, int, int, int> selected_policy_id)
+      : params_(params), selected_policy_id_(selected_policy_id) {}
+
+  template <int WG_M, int WG_N, int SG_M, int SG_N, int SG_K, int SLM_KS>
+  bool dispatch(se::gpu::GpuStreamHandle handle);
+
+  bool run(se::gpu::GpuStreamHandle handle);
+
+ private:
+  DispatchParams* params_;
+  std::tuple<int, int, int, int, int, int> selected_policy_id_;
+};
+
+}  // namespace xetla
+}  // namespace gpu
+
+#endif  // XLA_SERVICE_GPU_XETLA_GEMM_DISPATCH_ROW_MAJOR_H_