Skip to content

Commit 35c6d15

Browse files
authored
Python-generated schedules (just mirroring pipeline bundles) (#997)
Invoke either as `build/bin/tpp-sched` or `python -m mlir.tpp.sched` (which requires `PYTHONPATH` to point to `build/python_packages` - the `tools/tpp-sched/tpp-sched.py` script (installed as `tpp-sched`) takes care of that). When `tpp-sched` is invoked it just dumps the default pipeline schedule to stdout. When a path is provided as a positional argument, i.e. `tpp-sched.py PAYLOAD_FILE`, the schedule gets generated and immediately applied to the payload and only the transformed payload gets dumped to stdout. Use `--bundles=BUNDLE1,...,BUNDLEN` to control which bundles make up the schedule. All the bundles' arguments available on `tpp-opt` are available on `tpp-sched`. When invoked as `tpp-sched --payload PAYLOAD_FILE`, it will just print the payload and schedule together as one IR dump. This is meant to facilitate working directly with the IR before passing it on to, e.g., `tpp-opt` (to apply the schedule to the payload use `build/bin/tpp-sched --payload PAYLOAD_FILE | build/bin/tpp-opt --load-tpp-dialects --transform-interpreter` -- as to why we need `--load-tpp-dialects`, see [here](https://discourse.llvm.org/t/dynamic-pass-scheduling-and-dialect-loading/85166/2)). Introduces Python-bindings for our own dialects, for now mostly to be able to register them from Python before running passes that dependent on them. Requires MLIR to be build with `-DMLIR_ENABLE_BINDINGS_PYTHON=1`. Adds RUN lines which use `tpp-sched` to each test which used `tpp-opt` to apply a pipeline bundle. For now we only have partial support for the GPU pipeline bundles.
1 parent bcc13ba commit 35c6d15

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+982
-17
lines changed

CMakeLists.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,28 @@ endif()
4343
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
4444
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
4545

46+
# Nanobind for Python bindings
47+
if (CMAKE_VERSION VERSION_LESS 3.18)
48+
set(PYTHON_DEV_MODULE Development)
49+
else()
50+
set(PYTHON_DEV_MODULE Development.Module)
51+
endif()
52+
53+
find_package(Python 3.8 COMPONENTS Interpreter ${PYTHON_DEV_MODULE} REQUIRED)
54+
55+
# Nanobind docs say "Without this addition, binding code may run slowly and produce large binaries.":
56+
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
57+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
58+
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
59+
endif()
60+
61+
# Detect the pip/conda-installed nanobind package and import it into CMake
62+
execute_process(
63+
COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir
64+
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT)
65+
find_package(nanobind CONFIG REQUIRED)
66+
message (STATUS "Nanobind found")
67+
4668
set(TPP_MAIN_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/include")
4769
set(TPP_GEN_INCLUDE_DIR "${PROJECT_BINARY_DIR}/include")
4870

@@ -71,6 +93,7 @@ include(sanitizers)
7193
# Sub directories
7294
add_subdirectory(include)
7395
add_subdirectory(lib)
96+
add_subdirectory(python)
7497
add_subdirectory(runtime)
7598
add_subdirectory(tools)
7699
add_subdirectory(test)

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ cmake -G Ninja ../llvm \
5757
-DLLVM_ENABLE_ASSERTIONS=ON \
5858
-DCMAKE_C_COMPILER=clang \
5959
-DCMAKE_CXX_COMPILER=clang++ \
60+
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
6061
-DLLVM_USE_LINKER=lld
6162

6263
# Build
63-
ninja
64+
ninja
6465

6566
popd
6667
```
@@ -97,7 +98,7 @@ popd
9798

9899
To enable experimental GPU support see: [GPU/README.md](lib/TPP/GPU/README.md)
99100

100-
In the example above, we are building both LLVM/MLIR and tpp-mlir in relese mode. You can easily change the build type by adopting the `-DCMAKE_BUILD_TYPE` option, e.g. `=DCMAKE_BUILD_TYPE=RelWithDebInfo`.
101+
In the example above, we are building both LLVM/MLIR and tpp-mlir in relese mode. You can easily change the build type by adopting the `-DCMAKE_BUILD_TYPE` option, e.g. `=DCMAKE_BUILD_TYPE=RelWithDebInfo`.
101102

102103
### Conda Environment
103104

include/TPP-CAPI/Dialects.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifndef TPP_CAPI_DIALECTS_H
2+
#define TPP_CAPI_DIALECTS_H
3+
4+
#include "mlir-c/IR.h"
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(Check, check);
11+
MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(Xsmm, xsmm);
12+
MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(Perf, perf);
13+
14+
#ifdef __cplusplus
15+
}
16+
#endif
17+
18+
#endif // TPP_CAPI_DIALECTS_H

include/TPP/Passes.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ namespace arith {
2222
class ArithDialect;
2323
} // namespace arith
2424

25+
namespace async {
26+
class AsyncDialect;
27+
} // namespace async
28+
2529
namespace check {
2630
class CheckDialect;
2731
} // namespace check
@@ -56,6 +60,10 @@ namespace memref {
5660
class MemRefDialect;
5761
} // namespace memref
5862

63+
namespace omp {
64+
class OpenMPDialect;
65+
} // namespace omp
66+
5967
namespace perf {
6068
class PerfDialect;
6169
} // namespace perf

include/TPP/Passes.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,23 @@
1111

1212
include "mlir/Pass/PassBase.td"
1313

14+
def LoadTppDialects : Pass<"load-tpp-dialects", "ModuleOp"> {
15+
let summary = "Pre-load all TPP-specific dialects";
16+
let description = [{
17+
Pre-load dialects that -transform-interpreter would try to load at runtime.
18+
19+
The issue is that -transform-interpreter runs inside the multi-threaded
20+
passmanager. Hence when the interpreter dynamically tries to load dependent
21+
dialects this triggers an assert as loading during multi-threaded execution
22+
could lead to concurrency issues.
23+
}];
24+
let dependentDialects = ["xsmm::XsmmDialect",
25+
"check::CheckDialect",
26+
"perf::PerfDialect",
27+
"omp::OpenMPDialect",
28+
"async::AsyncDialect"];
29+
}
30+
1431
def ConvertLinalgToXsmm : Pass<"convert-linalg-to-xsmm", "func::FuncOp"> {
1532
let summary = "Convert linalg to xsmm";
1633
let description = [{

lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
22

33
add_subdirectory(TPP)
4+
add_subdirectory(TPP-CAPI)

lib/TPP-CAPI/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
add_mlir_public_c_api_library(TPPCAPI
2+
Dialects.cpp
3+
LINK_LIBS PUBLIC
4+
TPPCheckDialect
5+
TPPPerfDialect
6+
TPPXsmmDialect
7+
TPPTransforms
8+
TPPPipeline
9+
tpp_xsmm_runner_utils
10+
)

lib/TPP-CAPI/Dialects.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#include "mlir/CAPI/Registration.h"
2+
3+
#include "TPP-CAPI/Dialects.h"
4+
#include "TPP/Dialect/Check/CheckDialect.h"
5+
#include "TPP/Dialect/Perf/PerfDialect.h"
6+
#include "TPP/Dialect/Xsmm/XsmmDialect.h"
7+
8+
MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(Check, check, mlir::check::CheckDialect)
9+
10+
MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(Xsmm, xsmm, mlir::xsmm::XsmmDialect)
11+
12+
MLIR_DEFINE_CAPI_DIALECT_REGISTRATION(Perf, perf, mlir::perf::PerfDialect)

lib/TPP/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
1212
add_mlir_library(TPPPipeline
1313
DefaultPipeline.cpp
1414
DefaultTppPasses.cpp
15+
LoadTppDialects.cpp
1516

1617
ADDITIONAL_HEADER_DIRS
1718
${PROJECT_SOURCE_DIR}/include/TPP

lib/TPP/LoadTppDialects.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===- LoadTppDialects.cpp -----------------------------------------*- C++-*-===//
2+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See https://llvm.org/LICENSE.txt for license information.
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
//
6+
//===----------------------------------------------------------------------===//
7+
//
8+
// Pass is a no-op as it is only used for the side-effect of loading dialects.
9+
//
10+
//===----------------------------------------------------------------------===//
11+
#include "mlir/Pass/Pass.h"
12+
#include "mlir/IR/BuiltinOps.h"
13+
#include "TPP/Dialect/Check/CheckDialect.h"
14+
#include "TPP/Dialect/Perf/PerfDialect.h"
15+
#include "TPP/Dialect/Xsmm/XsmmDialect.h"
16+
#include "mlir/Dialect/Async/IR/Async.h"
17+
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
18+
19+
20+
namespace mlir {
21+
namespace tpp {
22+
#define GEN_PASS_DEF_LOADTPPDIALECTS
23+
#include "TPP/Passes.h.inc"
24+
} // namespace tpp
25+
} // namespace mlir
26+
27+
using namespace mlir;
28+
using namespace std;
29+
30+
namespace mlir {
31+
namespace tpp {
32+
struct LoadTppDialects
33+
: public impl::LoadTppDialectsBase<LoadTppDialects> {
34+
void runOnOperation() override {}
35+
};
36+
} // namespace tpp
37+
} // namespace mlir
38+

0 commit comments

Comments
 (0)