From 4cd019014cab8922dc7791f40f2961c9c294cb27 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 18 Oct 2024 12:13:07 +0000 Subject: [PATCH] Use the same driver for the upstream profiler as for the legacy profiler with IPEX Signed-off-by: Anatoly Myachev --- .../triton_kernels_benchmark/__init__.py | 4 +-- .../benchmark_driver.py | 35 ++++++++++++------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/benchmarks/triton_kernels_benchmark/__init__.py b/benchmarks/triton_kernels_benchmark/__init__.py index 7f765f586e..02857fdd99 100644 --- a/benchmarks/triton_kernels_benchmark/__init__.py +++ b/benchmarks/triton_kernels_benchmark/__init__.py @@ -1,6 +1,6 @@ -from .benchmark_testing import do_bench, assert_close, perf_report, Benchmark, USE_IPEX_OPTION # type: ignore # noqa: F401 +from .benchmark_testing import do_bench, assert_close, perf_report, Benchmark, USE_IPEX_OPTION, BENCHMARKING_METHOD # type: ignore # noqa: F401 -if USE_IPEX_OPTION: +if USE_IPEX_OPTION or BENCHMARKING_METHOD == "UPSTREAM_PYTORCH_PROFILER": from triton.runtime import driver from . import benchmark_driver # replace the launcher with the profilier hook. diff --git a/benchmarks/triton_kernels_benchmark/benchmark_driver.py b/benchmarks/triton_kernels_benchmark/benchmark_driver.py index aa1cbb41ae..2e1ef40fdf 100644 --- a/benchmarks/triton_kernels_benchmark/benchmark_driver.py +++ b/benchmarks/triton_kernels_benchmark/benchmark_driver.py @@ -10,15 +10,15 @@ from triton.runtime.build import _build, quiet import torch -import intel_extension_for_pytorch + +from .benchmark_testing import USE_IPEX_OPTION _dirname = os.getenv("ZE_PATH", default="/usr/local") include_dir = [ os.path.join(_dirname, "include"), os.path.join(torch.utils.cmake_prefix_path, "../../include"), - os.path.join(torch.utils.cmake_prefix_path, "../../include/torch/csrc/api/include"), - os.path.join(intel_extension_for_pytorch.cmake_prefix_path, "../../include") + os.path.join(torch.utils.cmake_prefix_path, "../../include/torch/csrc/api/include") ] oneapi_root = os.getenv("ONEAPI_ROOT") @@ -28,12 +28,15 @@ os.path.join(oneapi_root, "compiler/latest/include/sycl") ] -library_dir = [ - os.path.join(_dirname, "lib"), - os.path.join(torch.utils.cmake_prefix_path, "../../lib"), - os.path.join(intel_extension_for_pytorch.cmake_prefix_path, "../../lib") -] -libraries = ["ze_loader", "sycl", "torch", "intel-ext-pt-gpu"] +library_dir = [os.path.join(_dirname, "lib"), os.path.join(torch.utils.cmake_prefix_path, "../../lib")] +libraries = ["ze_loader", "sycl", "torch"] + +if USE_IPEX_OPTION: + import intel_extension_for_pytorch + + include_dir.append(os.path.join(intel_extension_for_pytorch.cmake_prefix_path, "../../include")) + library_dir.append(os.path.join(intel_extension_for_pytorch.cmake_prefix_path, "../../lib")) + libraries.append("intel-ext-pt-gpu") def compile_module_from_src(src, name): @@ -141,6 +144,14 @@ def format_of(ty): fmt = "iiiOOOOOO" + args_format args_list = ", " + ", ".join(f"&_arg{i}" for i, ty in signature.items()) if len(signature) > 0 else "" + record_function_header = "#include " + ipex_header = "" + xpu_profiler_record = "" + if USE_IPEX_OPTION: + record_function_header = "#include " + ipex_header = "#include " + xpu_profiler_record = "xpu::profiler_record(kernel_name, event);" + # generate glue code src = f""" #include @@ -149,8 +160,8 @@ def format_of(ty): #include #include #include - #include - #include + {record_function_header} + {ipex_header} #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include @@ -291,7 +302,7 @@ def format_of(ty): }} }}; auto event = stream.submit(cgf); - xpu::profiler_record(kernel_name, event); + {xpu_profiler_record} }} // end sycl static PyObject* launch(PyObject* self, PyObject* args) {{