Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/benchgc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ add_subdirectory("src/benchgc/tensor")
add_subdirectory("src/benchgc/arith")
add_subdirectory("src/benchgc/pattern")
add_subdirectory("src/benchgc/math")
add_subdirectory("src/benchgc/tuner")
3 changes: 3 additions & 0 deletions test/benchgc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,15 @@ module {
### --bench_kind [str]
* py : use the MLIR Python API to invoke the kernel and use Python to calculate the time cost
* wrapper : modify MLIR by wrapping the kernel into a new method and calling the `nanoTime()` method before and after calling the kernel. Finally, calculate the difference as the time cost
* default: `py`

### --warm_up [int]
* warm-up times of the execution
* default: 100

### --repeat [int]
* repeat times of the execution
* default: 100

## Pattern Options
Each pattern has its own unique options.
Expand Down
124 changes: 118 additions & 6 deletions test/benchgc/src/benchgc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@
set_default_fill,
)
from benchgc.arg.arg import Arg
from benchgc.bench import mlir_wrapper_bench, py_timeit_bench
from benchgc.bench import (
batch_mlir_wrapper_bench,
batch_py_timeit_bench,
mlir_wrapper_bench,
py_timeit_bench,
)
from benchgc.mlir.arg import get_mlir_args
from benchgc.pattern import get_pattern_clz
from benchgc.tuner.tuner import GATuner, GridTuner, Tuner, TuningSpace
from gc_mlir import ir
from gc_mlir.graph_compiler import GraphCompiler

Expand All @@ -44,7 +50,7 @@ def add_common_options(parser: argparse.ArgumentParser):
"--mode",
required=False,
help="specify the test mode, C for correctness testing, P for performance testing",
choices=["C", "P"],
choices=["C", "P", "T"],
default="C",
type=str,
)
Expand Down Expand Up @@ -198,13 +204,20 @@ def add_common_options(parser: argparse.ArgumentParser):

def add_bench_options(parser: argparse.ArgumentParser):
"""add options for bench mode"""
if parser.parse_known_args()[0].mode == "P":
if parser.parse_known_args()[0].mode in ("P", "T"):
parser.add_argument(
"--bench_kind", type=str, choices=["py", "wrapper"], default="py"
)
parser.add_argument("--warm_up", type=int, default=100)
parser.add_argument("--repeat", type=int, default=100)

parser.add_argument(
"--warm_up",
type=int,
default=100 if parser.parse_known_args()[0].mode == "P" else 2,
)
parser.add_argument(
"--repeat",
type=int,
default=100 if parser.parse_known_args()[0].mode == "P" else 4,
)


def add_pattern_options(parser: argparse.ArgumentParser):
Expand All @@ -213,6 +226,45 @@ def add_pattern_options(parser: argparse.ArgumentParser):
pattern_name = parser.parse_known_args()[0].case
get_pattern_clz(pattern_name).add_args(parser)

def add_tuner_options(parser: argparse.ArgumentParser):
"""add options for the mode T"""
if parser.parse_known_args()[0].mode == "T":
parser.add_argument(
"--search_alg", type=str, choices=["grid", "ga"], default="grid"
)
parser.add_argument(
"--tuning_batch", type=int, default=Tuner.DEFAULT_BATCH_SIZE
)
parser.add_argument("--early_stop", type=int, default=Tuner.DEFAULT_EARLY_STOP)
parser.add_argument(
"--max_tuning_iters", type=int, default=Tuner.DEFAULT_MAX_ITERS
)
parser.add_argument("--timeout", type=int, default=Tuner.DEFAULT_TIMEOUT)
parser.add_argument(
"--space_percent", type=float, default=TuningSpace.DEFAULT_SPACE_PERCENT
)
parser.add_argument(
"--tuner_verbose",
action="store_true",
help="if we need print the tuner log",
)
parser.add_argument("--checkpoint_path", type=str, default="")

if parser.parse_known_args()[0].search_alg == "ga":
parser.add_argument(
"--ga_random_seed", type=int, default=GATuner.DEFAULT_RANDOM_SEED
)
parser.add_argument(
"--ga_elite_num", type=int, default=GATuner.DEFAULT_ELITE_NUM
)
parser.add_argument(
"--ga_mutation_prob", type=float, default=GATuner.DEFAULT_MUTATION_PROB
)
parser.add_argument(
"--ga_expected_tune_num",
type=int,
default=GATuner.DEFAULT_EXPECTED_TUNE_NUM,
)

def get_module_and_args(flags: argparse.Namespace):
args: List[Arg] = []
Expand Down Expand Up @@ -391,17 +443,77 @@ def performance_testing(flags: argparse.Namespace, module: ir.Module, args: List
print(json_res)


def performance_tuning(flags: argparse.Namespace, module: ir.Module, args: List[Arg]):
gc_args: List[torch.Tensor | int] = []
gc_tensors: Dict[str, torch.Tensor] = {}
for i in range(len(args)):
tensor = fill_tensor(flags, args[i], i)
gc_tensors["%arg" + str(i)] = tensor
if args[i].scalar:
gc_args.append(tensor.data_ptr())
else:
gc_args.append(tensor)

mlir_args = get_mlir_args(gc_args)
with module.context as ctx, ir.Location.unknown():
if flags.ir_printing:
ctx.enable_multithreading(False)
batch_bench = (
batch_py_timeit_bench
if flags.bench_kind == "py"
else batch_mlir_wrapper_bench
)

def tuner_batch_bench(ir_moudles):
return batch_bench(
ir_moudles,
flags.entry,
"any(gc-cpu-pipeline)",
mlir_args,
flags.ir_printing,
flags.repeat,
flags.warm_up,
)

assert flags.space_percent > 0 and flags.space_percent <= 1.0
space = TuningSpace(module, flags.space_percent)
if flags.search_alg == "grid":
tuner = GridTuner(
tuner_batch_bench,
space,
flags.tuning_batch,
flags.early_stop,
flags.checkpoint_path,
flags.tuner_verbose,
)
else:
tuner = GATuner(
tuner_batch_bench,
space,
flags.tuning_batch,
flags.early_stop,
flags.checkpoint_path,
flags.tuner_verbose,
random_seed=flags.random_seed,
expected_tune_num=flags.expected_tune_num,
)
tuner.run(flags.max_tuning_iters, flags.timeout)


if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(prog="benchmark tool for graph compiler")
add_common_options(arg_parser)
add_bench_options(arg_parser)
add_pattern_options(arg_parser)
add_tuner_options(arg_parser)
flags = arg_parser.parse_args()
benchgc.util.set_seed(flags.seed)
ir_module, module_args = get_module_and_args(flags)
if flags.mode == "C":
correctness_testing(flags, ir_module, module_args)
elif flags.mode == "P":
performance_testing(flags, ir_module, module_args)
elif flags.mode == "T":
performance_tuning(flags, ir_module, module_args)
else:
pass
11 changes: 6 additions & 5 deletions test/benchgc/src/benchgc/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,22 @@ def batch_py_timeit_bench(
ir_modules: List[ir.Module],
entry_name: str,
pipeline: str,
mlir_args: list,
mlir_args: List[Any],
ir_printing=False,
repeat_time=5,
warm_up=2,
) -> List[Tuple[float, float]]:
"""benchmark a batch of mlir with python timeit."""
compiler = GraphCompiler(pipeline)
engines = []
funcs = []
compile_costs = []
for m in ir_modules:
compile_begin = timeit.default_timer()
engine = compiler.compile_and_jit(m, ir_printing=ir_printing)
engines.append(engine)
compile_cost = (timeit.default_timer() - compile_begin) * 1000
compile_costs.append(compile_cost)
funcs.append(engine.lookup(entry_name))

# Copied from execution_engine.py so that the cost of cast does not affect perf result.
packed_args = (ctypes.c_void_p * len(mlir_args))()
Expand All @@ -141,11 +142,11 @@ def batch_py_timeit_bench(
def run_bench(func, arg):
func(arg)

for func in funcs:
for func in [engine.lookup(entry_name) for engine in engines]:
timeit.timeit(lambda: run_bench(func, packed_args), number=warm_up)

execute_costs = []
for func in funcs:
for func in [engine.lookup(entry_name) for engine in engines]:
total_time = timeit.timeit(
lambda: run_bench(func, packed_args), number=repeat_time
)
Expand All @@ -158,7 +159,7 @@ def batch_mlir_wrapper_bench(
ir_modules: ir.Module,
entry_name: str,
pipeline: str,
mlir_args: list,
mlir_args: List[Any],
ir_printing=False,
repeat_time=5,
warm_up=2,
Expand Down
22 changes: 22 additions & 0 deletions test/benchgc/src/benchgc/tuner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################


file(GLOB PYTHON_SCRIPTS "*.py")
foreach(PY_SCRIPT ${PYTHON_SCRIPTS})
configure_file(${PY_SCRIPT} ${CMAKE_BINARY_DIR}/test/benchgc/src/benchgc/tuner/ COPYONLY)
endforeach()
Loading