Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,19 @@ build-*/
python/build/
python/dist/
python/triton*.egg-info/
python/*.whl

python/triton/_C/*.pyd
python/triton/_C/*.so
python/triton/_C/*.dylib

benchmarks/dist
benchmarks/*.egg-info/
benchmarks/**/*.so

# Logs
inductor_log/

# Backends copied from submodules
python/triton/backends/
!python/triton/backends/__init__.py
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ if(NOT WIN32)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
endif()

find_package(Python3 COMPONENTS Interpreter)
find_package(Python3 REQUIRED
COMPONENTS Development.Module)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason for this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need interpreter for cmake (like calling python scripts), but we need to compile python library.
You can find out more here: https://cmake.org/cmake/help/latest/module/FindPython.html

find_package(Torch REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python PATH "${TORCH_INSTALL_PREFIX}/lib")
find_package(XeTLALibrary REQUIRED)

if(USE_IPEX)
string(APPEND CMAKE_CXX_FLAGS " -DUSE_IPEX")
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/cmake/FindXeTLALibrary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
include(FetchContent)

if (NOT XeTLALibrary_FOUND)
# TODO: switch ot FetchContent_MakeAvailable once XeTLA supports it
cmake_policy(SET CMP0169 OLD)

set(XeTLALibrary_SOURCE_DIR
"${CMAKE_CURRENT_BINARY_DIR}/XeTLALibrary")
message(STATUS "XeTLALibrary is not specified. Will try to download
XeTLA library from https://github.com/intel/xetla into
${XeTLALibrary_SOURCE_DIR}")
file(READ xetla-library.conf XeTLALibrary_TAG)
file(READ xetla_kernel/xetla-library.conf XeTLALibrary_TAG)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've moved library requirements to the top level cmake.

# Strip the potential trailing newline from tag
string(STRIP "${XeTLALibrary_TAG}" XeTLALibrary_TAG)
FetchContent_Declare(xetla-library
Expand Down
130 changes: 91 additions & 39 deletions benchmarks/setup.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,135 @@
import os
import re
import shutil
import subprocess
import sysconfig
import sys

from setuptools import setup
# TODO: update once there is replacement for clean:
# https://github.com/pypa/setuptools/discussions/2838
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read the discussion and it seems like it will never happen.

Should we just switch to pip install command instead of python setup.py install here:

?

The replacement is described here: https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html#summary

Moreover, the build env setup is usually up to the build front-end (pip or pypa/build, for example) and they often just create "throw-away" virtualenvs under /tmp that you wouldn't need to clean up

Given that pip creates a temporary folder and there is no need to clean it up, we can avoid using the deprecated API.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm personally using setup.py as main tool. It also makes challenging to clean up when you build without isolation in pip.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In other words, until they come up with a better approach on cleaning, I would prefer to keep this deprecated API.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok for now, but most likely from everything I read, they can't provide such an API anymore, but delegate this task to other tools

from distutils import log # pylint: disable=[deprecated-module]
from distutils.dir_util import remove_tree # pylint: disable=[deprecated-module]
from distutils.command.clean import clean as _clean # pylint: disable=[deprecated-module]

from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext as _build_ext

import torch

ipex_cmake_prefix_path = ""
USE_IPEX_OPTION = os.getenv("USE_IPEX", "1")
if USE_IPEX_OPTION == "1":
import intel_extension_for_pytorch
ipex_cmake_prefix_path = f";{intel_extension_for_pytorch.cmake_prefix_path}"

class CMakeExtension(Extension):

def __init__(self, name):
# don't invoke the original build_ext for this special extension
super().__init__(name, sources=[])


class CMakeBuild():

def __init__(self):
def __init__(self, debug=False, dry_run=False):
self.current_dir = os.path.abspath(os.path.dirname(__file__))
self.build_temp = self.current_dir + "/build/temp"
self.extdir = self.current_dir + "/triton_kernels_benchmark"
self.build_type = self.get_build_type(debug)
self.cmake_prefix_paths = [torch.utils.cmake_prefix_path]
self.use_ipex = False
self.dry_run = dry_run

def get_build_type(self, debug):
DEBUG_OPTION = os.getenv("DEBUG", "0")
return "Debug" if debug or (DEBUG_OPTION == "1") else "Release"

def run(self):
try:
out = subprocess.check_output(["cmake", "--version"])
except OSError as error:
raise RuntimeError("CMake must be installed") from error
self.check_ipex()
self.build_extension()

match = re.search(r"version\s*(?P<major>\d+)\.(?P<minor>\d+)([\d.]+)?", out.decode())
cmake_major, cmake_minor = int(match.group("major")), int(match.group("minor"))
if (cmake_major, cmake_minor) < (3, 18):
raise RuntimeError("CMake >= 3.18.0 is required")
def check_ipex(self):
self.use_ipex = os.getenv("USE_IPEX", "1") == "1"
if not self.use_ipex:
return
try:
import intel_extension_for_pytorch
except ImportError:
log.warn("ipex is not installed trying to build without ipex")
self.use_ipex = False
return
self.cmake_prefix_paths.append(intel_extension_for_pytorch.cmake_prefix_path)

self.build_extension()
def check_call(self, *popenargs, **kwargs):
log.info(" ".join(popenargs[0]))
if not self.dry_run:
subprocess.check_call(*popenargs, **kwargs)

def build_extension(self):
ninja_dir = shutil.which("ninja")
# create build directories
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
# python directories
python_include_dir = sysconfig.get_path("platinclude")
cmake_args = [
"-G",
"Ninja", # Ninja is much faster than make
"-DCMAKE_MAKE_PROGRAM=" +
ninja_dir, # Pass explicit path to ninja otherwise cmake may cache a temporary path
f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}{ipex_cmake_prefix_path}",
f"-DUSE_IPEX={USE_IPEX_OPTION}",
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
"-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=" + self.extdir,
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + self.extdir,
"-DPython3_EXECUTABLE:FILEPATH=" + sys.executable,
"-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON",
"-DPYTHON_INCLUDE_DIRS=" + python_include_dir,
"-DCMAKE_PREFIX_PATH=" + ";".join(self.cmake_prefix_paths),
f"-DUSE_IPEX={int(self.use_ipex)}",
"-DCMAKE_INSTALL_PREFIX=" + self.extdir,
"-DPython3_ROOT_DIR:FILEPATH=" + sys.exec_prefix,
"-DCMAKE_VERBOSE_MAKEFILE=TRUE",
"-DCMAKE_C_COMPILER=icx",
"-DCMAKE_CXX_COMPILER=icpx",
"-DCMAKE_BUILD_TYPE=" + self.build_type,
"-S",
self.current_dir,
"-B",
self.build_temp,
]

# configuration
build_type = "Debug"
build_args = ["--config", build_type]
cmake_args += ["-DCMAKE_BUILD_TYPE=" + build_type]
max_jobs = os.getenv("MAX_JOBS", str(2 * os.cpu_count()))
build_args += ["-j" + max_jobs]
build_args = [
"--build",
self.build_temp,
"-j" + max_jobs,
]

install_args = [
"--build",
self.build_temp,
"--target",
"install",
]

env = os.environ.copy()
cmake_dir = self.build_temp
subprocess.check_call(["cmake", self.current_dir] + cmake_args, cwd=cmake_dir, env=env)
subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=cmake_dir)
self.check_call(["cmake"] + cmake_args, env=env)
self.check_call(["cmake"] + build_args)
self.check_call(["cmake"] + install_args)

def clean(self):
if os.path.exists(self.build_temp):
remove_tree(self.build_temp, dry_run=self.dry_run)
else:
log.warn("'%s' does not exist -- can't clean it", os.path.relpath(self.build_temp,
os.path.dirname(__file__)))


class build_ext(_build_ext):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name does not match the class naming style. CamelCase?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a class that overrides specific class of setuptools, so it kept that way to be consistent with the library.


def run(self):
cmake = CMakeBuild(debug=self.debug, dry_run=self.dry_run)
cmake.run()
super().run()


class clean(_clean):

def run(self):
cmake = CMakeBuild(dry_run=self.dry_run)
cmake.clean()
super().run()

cmake = CMakeBuild()
cmake.run()

setup(name="triton-kernels-benchmark", packages=[
"triton_kernels_benchmark",
], package_dir={
"triton_kernels_benchmark": "triton_kernels_benchmark",
}, package_data={"triton_kernels_benchmark": ["xetla_kernel.so"]})
}, package_data={"triton_kernels_benchmark": ["xetla_kernel.cpython-*.so"]}, cmdclass={
"build_ext": build_ext,
"clean": clean,
}, ext_modules=[CMakeExtension("triton_kernels_benchmark")])
10 changes: 3 additions & 7 deletions benchmarks/xetla_kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# XeTLA library is required.
find_package(XeTLALibrary REQUIRED)
set(CMAKE_CXX_STANDARD 20)

set(XETLA_KERNEL_FLAGS ${XETLA_KERNEL_FLAGS}
-fsycl
-fsycl-device-code-split=per_kernel
Expand Down Expand Up @@ -29,8 +25,7 @@ else()
set(XETLA_KERNEL_FLAGS ${XETLA_KERNEL_FLAGS} "${XETLA_OFFLINE_OPTIONS}")
endif()

add_library(xetla_kernel SHARED python_main.cpp)
set_target_properties(xetla_kernel PROPERTIES PREFIX "")
Python3_add_library(xetla_kernel MODULE WITH_SOABI python_main.cpp)
target_compile_options(xetla_kernel PRIVATE "-fPIC")
if(USE_IPEX)
target_compile_options(xetla_kernel PRIVATE "-fsycl")
Expand All @@ -40,7 +35,6 @@ endif()
target_compile_options(xetla_kernel PUBLIC "-DXETPP_NEW_XMAIN")
target_link_options(xetla_kernel PRIVATE ${XETLA_KERNEL_FLAGS})
target_link_libraries(xetla_kernel PUBLIC ${TORCH_LIBRARIES} ${TORCH_PYTHON_LIBRARY})
target_include_directories(xetla_kernel PUBLIC "${PYTHON_INCLUDE_DIRS}")
target_include_directories(xetla_kernel PUBLIC "${XeTLALibrary_INCLUDE_DIR}")

if(USE_IPEX)
Expand All @@ -52,3 +46,5 @@ add_subdirectory(softmax)
add_subdirectory(gemm)
add_subdirectory(stream_k_gemm)
add_subdirectory(flash_attention)

install(TARGETS xetla_kernel LIBRARY DESTINATION .)
2 changes: 2 additions & 0 deletions benchmarks/xetla_kernel/flash_attention/fmha_forward_v5.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#ifndef TRITONBENCHMARK_FMHA_FWD_V5_H
#define TRITONBENCHMARK_FMHA_FWD_V5_H

#include <cmath>

#include "fmha_policy_v2.h"
#include "fmha_utils.h"
#include "xetla.hpp"
Expand Down