diff --git a/codegen/tools/gen_all_oplist.py b/codegen/tools/gen_all_oplist.py index ec02ff7ec3c..79347ca0e5d 100644 --- a/codegen/tools/gen_all_oplist.py +++ b/codegen/tools/gen_all_oplist.py @@ -1,4 +1,3 @@ -#!/usr/bin/env fbpython # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # @@ -7,25 +6,63 @@ import argparse import os +import re import sys +from functools import reduce +from pathlib import Path from typing import Any, List -from tools_copy.code_analyzer import gen_oplist_copy_from_core +import yaml +from torchgen.selective_build.selector import ( + combine_selective_builders, + SelectiveBuilder, +) + + +def throw_if_any_op_includes_overloads(selective_builder: SelectiveBuilder) -> None: + ops = [] + for op_name, op in selective_builder.operators.items(): + if op.include_all_overloads: + ops.append(op_name) + if ops: + raise Exception( # noqa: TRY002 + ( + "Operators that include all overloads are " + + "not allowed since --allow-include-all-overloads " + + "was not specified: {}" + ).format(", ".join(ops)) + ) + + +def resolve_model_file_path_to_buck_target(model_file_path: str) -> str: + real_path = str(Path(model_file_path).resolve(strict=True)) + # try my best to convert to buck target + prog = re.compile( + r"/.*/buck-out/.*/(fbsource|fbcode)/[0-9a-f]*/(.*)/__(.*)_et_oplist__/out/selected_operators.yaml" + ) + match = prog.match(real_path) + if match: + return f"{match.group(1)}//{match.group(2)}:{match.group(3)}" + else: + return real_path def main(argv: List[Any]) -> None: - """This binary is a wrapper for //executorch/codegen/tools/gen_oplist_copy_from_core.py. - This is needed because we intend to error out for the case where `model_file_list_path` - is empty or invalid, so that the ExecuTorch build will fail when no selective build target - is provided as a dependency to ExecuTorch build. + """This binary generates 3 files: + + 1. selected_mobile_ops.h: Primary operators used by templated selective build and Kernel Function + dtypes captured by tracing + 2. selected_operators.yaml: Selected root and non-root operators (either via tracing or static analysis) """ parser = argparse.ArgumentParser(description="Generate operator lists") parser.add_argument( + "--output-dir", "--output_dir", help=("The directory to store the output yaml file (selected_operators.yaml)"), required=True, ) parser.add_argument( + "--model-file-list-path", "--model_file_list_path", help=( "Path to a file that contains the locations of individual " @@ -36,6 +73,7 @@ def main(argv: List[Any]) -> None: required=True, ) parser.add_argument( + "--allow-include-all-overloads", "--allow_include_all_overloads", help=( "Flag to allow operators that include all overloads. " @@ -46,26 +84,112 @@ def main(argv: List[Any]) -> None: default=False, required=False, ) + parser.add_argument( + "--check-ops-not-overlapping", + "--check_ops_not_overlapping", + help=( + "Flag to check if the operators in the model file list are overlapping. " + + "If not set, the script will not error out for overlapping operators." + ), + action="store_true", + default=False, + required=False, + ) + options = parser.parse_args(argv) - # check if the build has any dependency on any selective build target. If we have a target, BUCK shold give us either: + # Check if the build has any dependency on any selective build target. If we have a target, BUCK shold give us either: # 1. a yaml file containing selected ops (could be empty), or - # 2. a non-empty list of yaml files in the `model_file_list_path`. - # If none of the two things happened, the build target has no dependency on any selective build and we should error out. - options = parser.parse_args(argv) + # 2. a non-empty list of yaml files in the `model_file_list_path` or + # 3. a non-empty list of directories in the `model_file_list_path`, with each directory containing a `selected_operators.yaml` file. + # If none of the 3 things happened, the build target has no dependency on any selective build and we should error out. if os.path.isfile(options.model_file_list_path): - pass + print("Processing model file: ", options.model_file_list_path) + model_dicts = [] + model_dict = yaml.safe_load(open(options.model_file_list_path)) + model_dicts.append(model_dict) else: + print( + "Processing model file list or model directory list: ", + options.model_file_list_path, + ) assert ( options.model_file_list_path[0] == "@" ), "model_file_list_path is not a valid file path, or it doesn't start with '@'. This is likely a BUCK issue." + model_file_list_path = options.model_file_list_path[1:] + + model_dicts = [] with open(model_file_list_path) as model_list_file: model_file_names = model_list_file.read().split() assert ( len(model_file_names) > 0 ), "BUCK was not able to find any `et_operator_library` in the dependency graph of the current ExecuTorch " "build. Please refer to Selective Build wiki page to add at least one." - gen_oplist_copy_from_core.main(argv) + for model_file_name in model_file_names: + if not os.path.isfile(model_file_name): + model_file_name = os.path.join( + model_file_name, "selected_operators.yaml" + ) + print("Processing model file: ", model_file_name) + assert os.path.isfile( + model_file_name + ), f"{model_file_name} is not a valid file path. This is likely a BUCK issue." + with open(model_file_name, "rb") as model_file: + model_dict = yaml.safe_load(model_file) + resolved = resolve_model_file_path_to_buck_target(model_file_name) + for op in model_dict["operators"]: + model_dict["operators"][op]["debug_info"] = [resolved] + model_dicts.append(model_dict) + + selective_builders = [SelectiveBuilder.from_yaml_dict(m) for m in model_dicts] + + # Optionally check if the operators in the model file list are overlapping. + if options.check_ops_not_overlapping: + ops = {} + for model_dict in model_dicts: + for op_name in model_dict["operators"]: + if op_name in ops: + debug_info_1 = ",".join(ops[op_name]["debug_info"]) + debug_info_2 = ",".join( + model_dict["operators"][op_name]["debug_info"] + ) + error = f"Operator {op_name} is used in 2 models: {debug_info_1} and {debug_info_2}" + if "//" not in debug_info_1 and "//" not in debug_info_2: + error += "\nWe can't determine what BUCK targets these model files belong to." + tail = "." + else: + error += "\nPlease run the following commands to find out where is the BUCK target being added as a dependency to your target:\n" + error += f'\n buck2 cquery "allpaths(, {debug_info_1})"' + error += f'\n buck2 cquery "allpaths(, {debug_info_2})"' + tail = "as well as results from BUCK commands listed above." + + error += ( + "\n\nIf issue is not resolved, please post in PyTorch Edge Q&A with this error message" + + tail + ) + raise Exception(error) # noqa: TRY002 + ops[op_name] = model_dict["operators"][op_name] + # We may have 0 selective builders since there may not be any viable + # pt_operator_library rule marked as a dep for the pt_operator_registry rule. + # This is potentially an error, and we should probably raise an assertion + # failure here. However, this needs to be investigated further. + selective_builder = SelectiveBuilder.from_yaml_dict({}) + if len(selective_builders) > 0: + selective_builder = reduce( + combine_selective_builders, + selective_builders, + ) + + if not options.allow_include_all_overloads: + throw_if_any_op_includes_overloads(selective_builder) + with open( + os.path.join(options.output_dir, "selected_operators.yaml"), "wb" + ) as out_file: + out_file.write( + yaml.safe_dump( + selective_builder.to_dict(), default_flow_style=False + ).encode("utf-8"), + ) if __name__ == "__main__": diff --git a/codegen/tools/gen_oplist.py b/codegen/tools/gen_oplist.py index fbb191a6a81..ef451f0cd8b 100644 --- a/codegen/tools/gen_oplist.py +++ b/codegen/tools/gen_oplist.py @@ -249,7 +249,7 @@ def gen_oplist( _dump_yaml( sorted(op_set), output_path, - os.path.basename(source_name) if source_name else None, + source_name, et_kernel_metadata, include_all_operators, ) diff --git a/codegen/tools/gen_oplist_copy_from_core.py b/codegen/tools/gen_oplist_copy_from_core.py deleted file mode 100644 index 34a8af245bb..00000000000 --- a/codegen/tools/gen_oplist_copy_from_core.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# This is a simplified copy from //xplat/caffe2/tools/code_analyzer/gen_oplist.py -import argparse -import os -import sys -from functools import reduce -from typing import Any, List - -import yaml -from torchgen.selective_build.selector import ( - combine_selective_builders, - SelectiveBuilder, -) - - -def throw_if_any_op_includes_overloads(selective_builder: SelectiveBuilder) -> None: - ops = [] - for op_name, op in selective_builder.operators.items(): - if op.include_all_overloads: - ops.append(op_name) - if ops: - raise Exception( # noqa: TRY002 - ( - "Operators that include all overloads are " - + "not allowed since --allow-include-all-overloads " - + "was specified: {}" - ).format(", ".join(ops)) - ) - - -def main(argv: List[Any]) -> None: - """This binary generates 3 files: - - 1. selected_mobile_ops.h: Primary operators used by templated selective build and Kernel Function - dtypes captured by tracing - 2. selected_operators.yaml: Selected root and non-root operators (either via tracing or static analysis) - """ - parser = argparse.ArgumentParser(description="Generate operator lists") - parser.add_argument( - "--output-dir", - "--output_dir", - help=( - "The directory to store the output yaml files (selected_mobile_ops.h, " - + "selected_kernel_dtypes.h, selected_operators.yaml)" - ), - required=True, - ) - parser.add_argument( - "--model-file-list-path", - "--model_file_list_path", - help=( - "Path to a file that contains the locations of individual " - + "model YAML files that contain the set of used operators. This " - + "file path must have a leading @-symbol, which will be stripped " - + "out before processing." - ), - required=True, - ) - parser.add_argument( - "--allow-include-all-overloads", - "--allow_include_all_overloads", - help=( - "Flag to allow operators that include all overloads. " - + "If not set, operators registered without using the traced style will" - + "break the build." - ), - action="store_true", - default=False, - required=False, - ) - options = parser.parse_args(argv) - - if os.path.isfile(options.model_file_list_path): - print("Processing model file: ", options.model_file_list_path) - model_dicts = [] - model_dict = yaml.safe_load(open(options.model_file_list_path)) - model_dicts.append(model_dict) - else: - print("Processing model directory: ", options.model_file_list_path) - assert options.model_file_list_path[0] == "@" - model_file_list_path = options.model_file_list_path[1:] - - model_dicts = [] - with open(model_file_list_path) as model_list_file: - model_file_names = model_list_file.read().split() - for model_file_name in model_file_names: - with open(model_file_name, "rb") as model_file: - model_dict = yaml.safe_load(model_file) - model_dicts.append(model_dict) - - selective_builders = [SelectiveBuilder.from_yaml_dict(m) for m in model_dicts] - - # We may have 0 selective builders since there may not be any viable - # pt_operator_library rule marked as a dep for the pt_operator_registry rule. - # This is potentially an error, and we should probably raise an assertion - # failure here. However, this needs to be investigated further. - selective_builder = SelectiveBuilder.from_yaml_dict({}) - if len(selective_builders) > 0: - selective_builder = reduce( - combine_selective_builders, - selective_builders, - ) - - if not options.allow_include_all_overloads: - throw_if_any_op_includes_overloads(selective_builder) - with open( - os.path.join(options.output_dir, "selected_operators.yaml"), "wb" - ) as out_file: - out_file.write( - yaml.safe_dump( - selective_builder.to_dict(), default_flow_style=False - ).encode("utf-8"), - ) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/codegen/tools/targets.bzl b/codegen/tools/targets.bzl index 4d3843bfb75..62037b92300 100644 --- a/codegen/tools/targets.bzl +++ b/codegen/tools/targets.bzl @@ -78,15 +78,6 @@ def define_common_targets(is_fbcode = False): ], ) - runtime.python_library( - name = "gen_oplist_copy_from_core", - srcs = [ - "gen_oplist_copy_from_core.py", - ], - base_module = "tools_copy.code_analyzer", - external_deps = ["torchgen"], - ) - runtime.python_library( name = "gen_all_oplist_lib", srcs = ["gen_all_oplist.py"], @@ -94,7 +85,7 @@ def define_common_targets(is_fbcode = False): visibility = [ "//executorch/...", ], - deps = [":gen_oplist_copy_from_core"], + external_deps = ["torchgen"], ) runtime.python_binary( @@ -130,7 +121,7 @@ def define_common_targets(is_fbcode = False): srcs = ["gen_selected_op_variants.py"], base_module = "executorch.codegen.tools", visibility = ["//executorch/..."], - deps = [":gen_oplist_copy_from_core"], + deps = [":gen_all_oplist_lib"], ) runtime.python_binary( diff --git a/codegen/tools/test/test_gen_all_oplist.py b/codegen/tools/test/test_gen_all_oplist.py index 9eda126de4e..2d25af24833 100644 --- a/codegen/tools/test/test_gen_all_oplist.py +++ b/codegen/tools/test/test_gen_all_oplist.py @@ -1,58 +1,121 @@ -#!/usr/bin/env fbpython # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import os import tempfile import unittest -from unittest.mock import NonCallableMock, patch import executorch.codegen.tools.gen_all_oplist as gen_all_oplist +import yaml class TestGenAllOplist(unittest.TestCase): def setUp(self) -> None: self.temp_dir = tempfile.TemporaryDirectory() - self.temp_file = tempfile.NamedTemporaryFile(dir=self.temp_dir.name) + self.test1_yaml = os.path.join(self.temp_dir.name, "test1.yaml") + with open(self.test1_yaml, "w") as f: + f.write( + """ +build_features: [] +custom_classes: [] +et_kernel_metadata: + aten::_cdist_forward.out: + - default + aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out: + - default +operators: + aten::_cdist_forward.out: + debug_info: + - test1.yaml + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out: + debug_info: + - test1.yaml + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + """ + ) + self.test2_yaml = os.path.join(self.temp_dir.name, "test2.yaml") + with open(self.test2_yaml, "w") as f: + f.write( + """ +build_features: [] +custom_classes: [] +et_kernel_metadata: + aten::_cdist_forward.out: + - default + aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out: + - default +operators: + aten::_cdist_forward.out: + debug_info: + - test2.yaml + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out: + debug_info: + - test2.yaml + include_all_overloads: false + is_root_operator: true + is_used_for_training: true + """ + ) - @patch("tools_copy.code_analyzer.gen_oplist_copy_from_core.main") - def test_model_file_list_path_is_file_pass( - self, mock_main: NonCallableMock - ) -> None: + def test_gen_all_oplist_with_1_valid_yaml(self) -> None: args = [ f"--output_dir={self.temp_dir.name}", - f"--model_file_list_path={self.temp_file.name}", + f"--model_file_list_path={self.test1_yaml}", ] gen_all_oplist.main(args) - mock_main.assert_called_once_with(args) + self.assertTrue( + os.path.exists(os.path.join(self.temp_dir.name, "selected_operators.yaml")) + ) + with open(os.path.join(self.temp_dir.name, "selected_operators.yaml")) as f: + es = yaml.safe_load(f) + debug_info = es["operators"]["aten::_cdist_forward.out"]["debug_info"] + self.assertEqual(len(debug_info), 1) + self.assertTrue("test1.yaml" in debug_info) - @patch("tools_copy.code_analyzer.gen_oplist_copy_from_core.main") - def test_model_file_list_path_is_directory_with_file_pass( - self, mock_main: NonCallableMock + def test_gen_all_oplist_with_2_conflicting_yaml_no_check( + self, ) -> None: file_ = tempfile.NamedTemporaryFile() with open(file_.name, "w") as f: - f.write(self.temp_file.name) + f.write(f"{self.test1_yaml}\n{self.test2_yaml}") args = [ - f"--model_file_list_path=@{file_.name}", f"--output_dir={self.temp_dir.name}", + f"--model_file_list_path=@{file_.name}", ] gen_all_oplist.main(args) - mock_main.assert_called_once_with(args) - file_.close() + self.assertTrue( + os.path.exists(os.path.join(self.temp_dir.name, "selected_operators.yaml")) + ) + with open(os.path.join(self.temp_dir.name, "selected_operators.yaml")) as f: + es = yaml.safe_load(f) + debug_info = es["operators"]["aten::_cdist_forward.out"]["debug_info"] + self.assertEqual(len(debug_info), 2) + self.assertTrue(self.test1_yaml in debug_info) + self.assertTrue(self.test2_yaml in debug_info) - @patch("tools_copy.code_analyzer.gen_oplist_copy_from_core.main") - def test_model_file_list_path_is_empty_directory_throws( - self, mock_main: NonCallableMock + def test_gen_all_oplist_with_2_conflicting_yaml_raises( + self, ) -> None: file_ = tempfile.NamedTemporaryFile() + with open(file_.name, "w") as f: + f.write(f"{self.test1_yaml}\n{self.test2_yaml}") args = [ - f"--model_file_list_path=@{file_.name}", f"--output_dir={self.temp_dir.name}", + f"--model_file_list_path=@{file_.name}", + "--check_ops_not_overlapping", ] - with self.assertRaises(AssertionError): + with self.assertRaisesRegex(Exception, "Operator .* is used in 2 models"): gen_all_oplist.main(args) def tearDown(self): diff --git a/codegen/tools/test/test_gen_oplist.py b/codegen/tools/test/test_gen_oplist.py index bd1d0082489..c47ac252f9c 100644 --- a/codegen/tools/test/test_gen_oplist.py +++ b/codegen/tools/test/test_gen_oplist.py @@ -119,6 +119,7 @@ def test_gen_op_list_with_both_op_list_and_ops_schema_yaml_merges( mock_get_operators: NonCallableMock, ) -> None: output_path = os.path.join(self.temp_dir.name, "output.yaml") + test_path = os.path.join(self.temp_dir.name, "test.yaml") args = [ f"--output_path={output_path}", "--root_ops=aten::relu.out", @@ -128,7 +129,7 @@ def test_gen_op_list_with_both_op_list_and_ops_schema_yaml_merges( mock_dump_yaml.assert_called_once_with( ["aten::add.out", "aten::mul.out", "aten::relu.out"], output_path, - "test.yaml", + test_path, { "aten::relu.out": ["default"], "aten::add.out": ["default"], diff --git a/shim/xplat/executorch/codegen/codegen.bzl b/shim/xplat/executorch/codegen/codegen.bzl index d989e33591f..3b6a0dd247a 100644 --- a/shim/xplat/executorch/codegen/codegen.bzl +++ b/shim/xplat/executorch/codegen/codegen.bzl @@ -531,12 +531,12 @@ def executorch_generated_lib( ) # genrule for selective build from static operator list - oplist_dir_name = name + "_pt_oplist" + oplist_dir_name = name + "_et_oplist" runtime.genrule( name = oplist_dir_name, macros_only = False, cmd = ("$(exe fbsource//xplat/executorch/codegen/tools:gen_all_oplist) " + - "--model_file_list_path $(@query_outputs 'attrfilter(labels, et_operator_library, deps(set({deps})))') " + + "--model_file_list_path $(@query_outputs \'attrfilter(labels, et_operator_library, deps(set({deps})))\') " + "--allow_include_all_overloads " + "--output_dir $OUT ").format(deps = " ".join(["\"{}\"".format(d) for d in deps])), outs = {"selected_operators.yaml": ["selected_operators.yaml"]}, @@ -665,3 +665,31 @@ def executorch_generated_lib( define_static_target = define_static_targets, platforms = platforms, ) + +# Util macro that takes in a binary or a shared library, find targets ending with `_et_oplist` in the transitive closure of deps, +# get the `selected_operators.yaml` from those targets, try to merge them into a single yaml. This target will fail to build, if +# there are intersections of all `selected_operators.yaml` the `target` is depending on. +# +# An example failure case: a binary `bin` is depending on 2 `executorch_generated_lib`s and they both register `aten::add.out` +# with either the same or different kernels associated to it. +# +# If build successfully, all of the `selected_operators.yaml` will be merged into 1 `selected_operators.yaml` for debugging purpose. +def executorch_ops_check( + name, + deps, + **kwargs, +): + runtime.genrule( + name = name, + macros_only = False, + cmd = ("$(exe fbsource//xplat/executorch/codegen/tools:gen_all_oplist) " + + "--model_file_list_path $(@query_outputs \"filter('.*_et_oplist', deps(set({deps})))\") " + + "--allow_include_all_overloads " + + "--check_ops_not_overlapping " + + "--output_dir $OUT ").format(deps = " ".join(["\'{}\'".format(d) for d in deps])), + define_static_target = False, + platforms = kwargs.pop("platforms", get_default_executorch_platforms()), + outs = {"selected_operators.yaml": ["selected_operators.yaml"]}, + default_outs = ["."], + **kwargs, + )