diff --git a/mlir-tensorrt/.bazelrc b/mlir-tensorrt/.bazelrc new file mode 100644 index 000000000..73178d8a2 --- /dev/null +++ b/mlir-tensorrt/.bazelrc @@ -0,0 +1,68 @@ +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Also available under a BSD-style license. See LICENSE. + +# TODO: migrate all dependencies from WORKSPACE to MODULE.bazel +# https://bazel.build/external/migration +common --noenable_bzlmod + +# Automatically detect host platform to pick config +common --enable_platform_specific_config + +############################################################################### +# Options for "generic_clang" builds: these options should generally apply to +# builds using a Clang-based compiler, and default to the `clang` executable on +# the `PATH`. While these are provided for convenience and may serve as a +# reference, it would be preferable for users to configure an explicit C++ +# toolchain instead of relying on `.bazelrc` files. +############################################################################### + +# Set the default compiler to the `clang` binary on the `PATH`. +build:generic_clang --repo_env=CC=clang + +# C++17 standard version is required. +build:generic_clang --cxxopt=-std=c++17 --host_cxxopt=-std=c++17 + +# Use `-Wall` for Clang. +build:generic_clang --copt=-Wall --host_copt=-Wall + +# The Clang available on MacOS has a warning that isn't clean on MLIR code. The +# warning doesn't show up with more recent Clangs, so just disable for now. +build:generic_clang --cxxopt=-Wno-range-loop-analysis --host_cxxopt=-Wno-range-loop-analysis + +# Build errors are not a helpful way to enforce deprecation in-repo and it is +# not the point of the Bazel build to catch usage of deprecated APIs. +build:generic_clang --copt=-Wno-deprecated --host_copt=-Wno-deprecated + + +# lld links faster than other linkers. Assume that anybody using clang on linux +# also has lld available. +build:linux --linkopt=-fuse-ld=lld --host_linkopt=-fuse-ld=lld +build:linux --config=generic_clang + +build:macos --config=generic_clang + +# Other compilation modes +build:opt --compilation_mode=opt +build:dbg --compilation_mode=dbg + +# GDB builds in dbg mode +build:gdb --config=dbg + + +# Continue running tests for targets that build without having +# AOT compile genrules failing to build from terminating the rest +test --keep_going + + +# rules_cuda convenience flags +build --flag_alias=enable_cuda=@rules_cuda//cuda:enable +build --flag_alias=cuda_archs=@rules_cuda//cuda:archs +build --flag_alias=cuda_compiler=@rules_cuda//cuda:compiler +build --flag_alias=cuda_copts=@rules_cuda//cuda:copts +build --flag_alias=cuda_host_copts=@rules_cuda//cuda:host_copts +build --flag_alias=cuda_runtime=@rules_cuda//cuda:runtime + +build:generic_clang --enable_cuda=True +build:generic_clang --cuda_compiler=clang diff --git a/mlir-tensorrt/BUILD b/mlir-tensorrt/BUILD new file mode 100644 index 000000000..2159a5470 --- /dev/null +++ b/mlir-tensorrt/BUILD @@ -0,0 +1,665 @@ +load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library", "td_library") +load("@rules_python//python:pip.bzl", "compile_pip_requirements") + +package( + default_visibility = [ + "//visibility:public", + ], +) + +td_library( + name = "TensorRTDialectTdFiles", + srcs = [ + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTAttributes.td", + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.td", + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTEnums.td", + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTInterfaces.td", + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOps.td", + ], + includes = ["tensorrt/include"], + deps = [ + "@llvm-project//mlir:AttrTdFiles", + "@llvm-project//mlir:ControlFlowInterfacesTdFiles", + "@llvm-project//mlir:DestinationStyleOpInterfaceTdFiles", + "@llvm-project//mlir:InferTypeOpInterfaceTdFiles", + "@llvm-project//mlir:LoopLikeInterfaceTdFiles", + "@llvm-project//mlir:OpBaseTdFiles", + "@llvm-project//mlir:SideEffectInterfacesTdFiles", + ], +) + +td_library( + name = "TensorKindInterfaceTdFiles", + srcs = [ + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindOpInterface.td", + ], + includes = ["tensorrt/include"], + deps = [ + "@llvm-project//mlir:OpBaseTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTOpsIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-op-decls"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOps.h.inc", + ), + ( + ["-gen-op-defs"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOps.cpp.inc", + ), + ( + [ + "-gen-typedef-decls", + "-dialect=tensorrt", + ], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOpsTypes.h.inc", + ), + ( + [ + "-gen-typedef-defs", + "-dialect=tensorrt", + ], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOpsTypes.cpp.inc", + ), + ( + [ + "-gen-dialect-decls", + "-dialect=tensorrt", + ], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOpsDialect.h.inc", + ), + ( + [ + "-gen-dialect-defs", + "-dialect=tensorrt", + ], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOpsDialect.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOps.td", + deps = [ + ":TensorRTDialectTdFiles", + "@llvm-project//mlir:CallInterfacesTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTEnumsIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-enum-decls"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTEnums.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTEnums.td", + deps = [ + ":TensorRTDialectTdFiles", + "@llvm-project//mlir:AttrTdFiles", + "@llvm-project//mlir:OpBaseTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTAttributesIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-attrdef-decls"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTAttributes.h.inc", + ), + ( + ["-gen-attrdef-defs"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTAttributes.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTAttributes.td", + deps = [ + ":TensorRTDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTInterfacesIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-op-interface-decls"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTInterfaces.h.inc", + ), + ( + ["-gen-op-interface-defs"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTInterfaces.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTInterfaces.td", + deps = [ + ":TensorRTDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTAttrInterfacesIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-attr-interface-decls"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTAttrInterfaces.h.inc", + ), + ( + ["-gen-attr-interface-defs"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTAttrInterfaces.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTInterfaces.td", + deps = [ + ":TensorRTDialectTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTTensorKindInterfacesIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-op-interface-decls"], + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindOpInterface.h.inc", + ), + ( + ["-gen-op-interface-defs"], + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindOpInterface.cpp.inc", + ), + ( + ["-gen-attr-interface-decls"], + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindAttrInterface.h.inc", + ), + ( + ["-gen-attr-interface-defs"], + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindAttrInterface.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindOpInterface.td", + deps = [ + ":TensorKindInterfaceTdFiles", + ], +) + +cc_library( + name = "TensorRTCommonUtils", + srcs = [ + "tensorrt/lib/Utils/ConstantFoldUtils.cpp", + "tensorrt/lib/Utils/NvInferPluginUtils.cpp", + "tensorrt/lib/Utils/ShapeUtils.cpp", + "tensorrt/lib/Utils/StaticValueUtils.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/Utils/ConstantFoldUtils.h", + "tensorrt/include/mlir-tensorrt-dialect/Utils/NvInferAdaptor.h", + "tensorrt/include/mlir-tensorrt-dialect/Utils/NvInferPluginUtils.h", + "tensorrt/include/mlir-tensorrt-dialect/Utils/Options.h", + "tensorrt/include/mlir-tensorrt-dialect/Utils/ShapeUtils.h", + "tensorrt/include/mlir-tensorrt-dialect/Utils/StaticValueUtils.h", + "tensorrt/include/mlir-tensorrt-dialect/Utils/TensorRTVersion.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LinalgDialect", + "@llvm-project//mlir:TensorDialect", + "@llvm-project//mlir:TransformUtils", + "@rules_cuda//cuda:runtime", + "@tensorrt10_x86//:tensorrt10", + ], +) + +cc_library( + name = "TensorRTDialect", + srcs = [ + "tensorrt/lib/TensorRT/IR/EinsumHelper.cpp", + "tensorrt/lib/TensorRT/IR/EinsumHelper.h", + "tensorrt/lib/TensorRT/IR/TensorKindOpInterfaceImpl.cpp", + "tensorrt/lib/TensorRT/IR/TensorRT.cpp", + "tensorrt/lib/TensorRT/IR/TypeInferenceInterfaceImpls.cpp", + "tensorrt/lib/TensorRT/IR/Verification.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindOpInterface.h", + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTDialect.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTAttrInterfacesIncGen", + ":TensorRTAttributesIncGen", + ":TensorRTEnumsIncGen", + ":TensorRTInterfacesIncGen", + ":TensorRTOpsIncGen", + ":TensorRTTensorKindInterfacesIncGen", + ":TensorRTCommonUtils", + "@llvm-project//mlir:FuncDialect", + "@llvm-project//mlir:QuantOps", + ], +) + +cc_library( + name = "TensorRTUtils", + srcs = [ + "tensorrt/lib/TensorRT/Utils/Utils.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/Utils/Utils.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTDialect", + ], +) + +cc_library( + name = "TensorRTTensorKindInterfaces", + srcs = [ + "tensorrt/lib/Interface/TensorKindOpInterface.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/Interface/TensorKindOpInterface.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTTensorKindInterfacesIncGen", + "@llvm-project//mlir:Analysis", + ], +) + +cc_library( + name = "TensorRTAnalysis", + srcs = [ + "tensorrt/lib/Analysis/TensorKindAnalysis.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/Analysis/TensorKindAnalysis.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTTensorKindInterfaces", + "@llvm-project//mlir:BufferizationDialect", + ], +) + +gentbl_cc_library( + name = "TensorRTTransformsPassesIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=TensorRT", + ], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/Transforms/Passes.td", + deps = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTTransformsActivationsPdllGen", + strip_include_prefix = "tensorrt/lib/TensorRT/Transforms", + tbl_outs = [ + ( + ["-x=cpp"], + "tensorrt/lib/TensorRT/Transforms/RaiseActivations.pdll.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-pdll", + td_file = "tensorrt/lib/TensorRT/Transforms/RaiseActivations.pdll", + deps = [ + ":TensorRTDialectTdFiles", + "@llvm-project//mlir:CallInterfacesTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTTransformsNormalizationsPdllGen", + strip_include_prefix = "tensorrt/lib/TensorRT/Transforms", + tbl_outs = [ + ( + ["-x=cpp"], + "tensorrt/lib/TensorRT/Transforms/RaiseNormalizations.pdll.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-pdll", + td_file = "tensorrt/lib/TensorRT/Transforms/RaiseNormalizations.pdll", + deps = [ + ":TensorRTDialectTdFiles", + "@llvm-project//mlir:CallInterfacesTdFiles", + ], +) + +cc_library( + name = "TensorRTTransforms", + srcs = [ + "tensorrt/lib/TensorRT/Transforms/ApplyBugWorkarounds.cpp", + "tensorrt/lib/TensorRT/Transforms/BroadcastElimination.cpp", + "tensorrt/lib/TensorRT/Transforms/ExpandOps.cpp", + "tensorrt/lib/TensorRT/Transforms/InferPluginShapes.cpp", + "tensorrt/lib/TensorRT/Transforms/LegalizeInt8.cpp", + "tensorrt/lib/TensorRT/Transforms/Passes.cpp", + "tensorrt/lib/TensorRT/Transforms/RaiseActivations.cpp", + "tensorrt/lib/TensorRT/Transforms/RaiseNormalizations.cpp", + "tensorrt/lib/TensorRT/Transforms/ReshapeElimination.cpp", + "tensorrt/lib/TensorRT/Transforms/TransposeElimination.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTDialect", + ":TensorRTTransformsActivationsPdllGen", + ":TensorRTTransformsNormalizationsPdllGen", + ":TensorRTTransformsPassesIncGen", + ], +) + +gentbl_cc_library( + name = "TensorRTGenericTransformPassesIncGen", + strip_include_prefix = "compiler/include", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=MLIRTensorRTGenericTransforms", + ], + "compiler/include/mlir-tensorrt/Transforms/Passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "compiler/include/mlir-tensorrt/Transforms/Passes.td", + deps = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + +cc_library( + name = "TensorRTGeneralTransforms", + srcs = [ + "compiler/lib/Transforms/DropNestedModules/DropNestedModules.cpp", + "compiler/lib/Transforms/DuplicateFunctionElimination/DuplicateFunctionElimination.cpp", + "compiler/lib/Transforms/MemRefCastElimination/MemRefCastElimination.cpp", + "compiler/lib/Transforms/SCFDetensorizeLoops/SCFDetensorizeLoops.cpp", + ], + hdrs = [ + "compiler/include/mlir-tensorrt/Transforms/Passes.h", + "compiler/include/mlir-tensorrt/Transforms/Transforms.h", + ], + strip_include_prefix = "compiler/include", + deps = [ + ":TensorRTAnalysis", + ":TensorRTGenericTransformPassesIncGen", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", + ], +) + +gentbl_cc_library( + name = "TensorRTEnumConverterGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["--gen-tensorrt-enum-converter-defs"], + "tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/EnumConverters.inc.cpp", + ), + ], + tblgen = ":mlir-tensorrt-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTEnums.td", + deps = [ + ":TensorRTDialectTdFiles", + "@llvm-project//mlir:AttrTdFiles", + "@llvm-project//mlir:OpBaseTdFiles", + ], +) + +gentbl_cc_library( + name = "TensorRTEncodingOpInterfaceIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["-gen-op-interface-decls"], + "tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/TensorRTEncodingOpInterface.h.inc", + ), + ( + ["-gen-op-interface-defs"], + "tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/TensorRTEncodingOpInterface.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/TensorRTEncodingOpInterface.td", + deps = [ + "@llvm-project//mlir:OpBaseTdFiles", + ], +) + +cc_library( + name = "TensorRTEncodingOpInterface", + srcs = [ + "tensorrt/lib/Target/TensorRTEncodingOpInterface/NetworkEncoder.cpp", + "tensorrt/lib/Target/TensorRTEncodingOpInterface/TensorRTEncodingOpInterface.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/NetworkEncoder.h", + "tensorrt/include/mlir-tensorrt-dialect/Target/TensorRTEncodingOpInterface/TensorRTEncodingOpInterface.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTDialect", + ":TensorRTEncodingOpInterfaceIncGen", + ":TensorRTEnumConverterGen", + ":TensorRTTensorKindInterfaces", + ":TensorRTUtils", + ], +) + +gentbl_cc_library( + name = "TensorRTEncodingIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + ["--gen-tensorrt-layer-add-defs"], + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/Target/TensorRTEncodingImpl.inc.cpp", + ), + ], + tblgen = ":mlir-tensorrt-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/TensorRT/IR/TensorRTOps.td", + deps = [ + ":TensorRTDialectTdFiles", + "@llvm-project//mlir:CallInterfacesTdFiles", + ], +) + +cc_library( + name = "TensorRTEncodingImpl", + srcs = [ + "tensorrt/lib/TensorRT/Target/TensorRTEncodingImpl.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/TensorRT/Target/TensorRTEncodingImpl.h", + ], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTEncodingIncGen", + ":TensorRTEncodingOpInterface", + ], +) + +cc_library( + name = "TensorRTDynamicLoader", + srcs = [ + "executor/lib/Utils/TensorRTDynamicLoader/TensorRTDynamicLoader.cpp", + ], + deps = [ + "@llvm-project//mlir:Support", + "@rules_cuda//cuda:runtime", + "@tensorrt10_x86//:tensorrt10", + ], +) + +gentbl_cc_library( + name = "TensorRTTranslationPassIncGen", + strip_include_prefix = "tensorrt/include", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=TensorRTTranslation", + ], + "tensorrt/include/mlir-tensorrt-dialect/Target/Passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tensorrt/include/mlir-tensorrt-dialect/Target/Passes.td", + deps = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + +cc_library( + name = "TensorRTTarget", + srcs = [ + "tensorrt/lib/Target/Registration.cpp", + "tensorrt/lib/Target/TranslateToTensorRT.cpp", + ], + hdrs = [ + "tensorrt/include/mlir-tensorrt-dialect/Target/Passes.h", + "tensorrt/include/mlir-tensorrt-dialect/Target/TranslateToTensorRT.h", + ], + copts = ["-DMLIR_TRT_TARGET_TENSORRT"], + strip_include_prefix = "tensorrt/include", + deps = [ + ":TensorRTEncodingImpl", + ":TensorRTEncodingOpInterface", + ":TensorRTTranslationPassIncGen", + "@llvm-project//llvm:BitWriter", + "@llvm-project//mlir:TranslateLib", + ], +) + +gentbl_cc_library( + name = "TensorRTConversionPassIncGen", + strip_include_prefix = "compiler/include", + tbl_outs = [ + ( + [ + "-gen-pass-decls", + "-name=MLIRTensorRTConversion", + ], + "compiler/include/mlir-tensorrt/Conversion/Passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "compiler/include/mlir-tensorrt/Conversion/Passes.td", + deps = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + +cc_library( + name = "TensorRTConversion", + srcs = [ + "compiler/lib/Conversion/TensorRTCommon/ConvertToTensorRTCommon.cpp", + ], + hdrs = [ + "compiler/include/mlir-tensorrt/Conversion/Passes.h", + "compiler/include/mlir-tensorrt/Conversion/Patterns.h", + "compiler/include/mlir-tensorrt/Conversion/TensorRTCommon/ConvertToTensorRTCommon.h", + ], + strip_include_prefix = "compiler/include", + deps = [ + ":TensorRTConversionPassIncGen", + ":TensorRTDialect", + "@llvm-project//mlir:ConversionPasses", + ], +) + +cc_library( + name = "TensorRTRegistration", + srcs = [ + "compiler/lib/Registration/Registration.cpp", + ], + hdrs = [ + "compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtCoreDialects.h", + "compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h", + "compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtPasses.h", + "compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtTranslations.h", + ], + copts = ["-DMLIR_TRT_TARGET_TENSORRT"], + strip_include_prefix = "compiler/include", + deps = [ + ":TensorRTConversion", + ":TensorRTGeneralTransforms", + ":TensorRTTarget", + ":TensorRTTransforms", + "@llvm-project//mlir:FuncExtensions", + "@llvm-project//mlir:TransformDialectTransforms", + + ], +) + +cc_binary( + name = "mlir-tensorrt-tblgen", + srcs = ["tools/MlirTensorRtTblgen.cpp"], + deps = [ + "@llvm-project//mlir:MlirTableGenMain", + ], +) + +cc_binary( + name = "tensorrt-opt", + srcs = ["tensorrt/tensorrt-opt/tensorrt-opt.cpp"], + copts = ["-DMLIR_TRT_TARGET_TENSORRT"], + linkopts = ["-Wl,-rpath,/opt/src/mlir-tensorrt/bazel-mlir-tensorrt/external/tensorrt10_x86/targets/x86_64-linux-gnu/lib"], + deps = [ + ":TensorRTRegistration", + "@llvm-project//mlir:MlirOptLib", + ], +) + +cc_binary( + name = "mlir-tensorrt-opt", + srcs = ["tools/MlirTensorRtOpt.cpp"], + copts = ["-DMLIR_TRT_TARGET_TENSORRT"], + linkopts = ["-Wl,-rpath,/opt/src/mlir-tensorrt/bazel-mlir-tensorrt/external/tensorrt10_x86/targets/x86_64-linux-gnu/lib"], + deps = [ + ":TensorRTRegistration", + "@llvm-project//mlir:MlirOptLib", + ], +) + +load("@com_github_bazelbuild_buildtools//buildifier:def.bzl", "buildifier") + +buildifier( + name = "buildifier", +) + +# bazel run //:requirements.update +compile_pip_requirements( + name = "requirements", + requirements_in = "//:requirements.txt", + requirements_txt = "//:requirements_lock.txt", + tags = ["manual"], +) diff --git a/mlir-tensorrt/WORKSPACE b/mlir-tensorrt/WORKSPACE new file mode 100644 index 000000000..effbbd3a5 --- /dev/null +++ b/mlir-tensorrt/WORKSPACE @@ -0,0 +1,81 @@ +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Also available under a BSD-style license. See LICENSE. + +workspace(name = "tensorrt-mlir") + +load("//:deps.bzl", "third_party_deps") + +third_party_deps() + +load("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure") + +llvm_configure( + name = "llvm-project", + targets = [ + "X86", + "NVPTX", + "AArch64", + ], +) + +# -------------------------- # +# CUDA Rules Setup # +# -------------------------- # + +load("@rules_cuda//cuda:repositories.bzl", "register_detected_cuda_toolchains", "rules_cuda_dependencies") + +rules_cuda_dependencies() + +register_detected_cuda_toolchains() + +# -------------------------- # +# Hermetic Python Setup # +# -------------------------- # + +load("@rules_python//python:repositories.bzl", "py_repositories") + +py_repositories() + +load("@rules_python//python:repositories.bzl", "python_register_toolchains") + +python_register_toolchains( + name = "python_3_10", + python_version = "3.10", +) + +load("@python_3_10//:defs.bzl", "interpreter") +load("@rules_python//python:pip.bzl", "pip_parse") + +pip_parse( + name = "pip_deps", + python_interpreter_target = interpreter, + requirements_lock = "//:requirements_lock.txt", +) + +load("@pip_deps//:requirements.bzl", "install_deps") + +install_deps() + +# --------------------------- # +# Buildifier Dependencies # +# --------------------------- # + +# https://github.com/bazelbuild/buildtools/blob/master/buildifier/README.md + +load("@io_bazel_rules_go//go:deps.bzl", "go_rules_dependencies") + +go_rules_dependencies() + +load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains") + +go_register_toolchains(version = "1.20.3") + +load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") + +gazelle_dependencies() + +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") + +protobuf_deps() diff --git a/mlir-tensorrt/build_tools/bazel/Dockerfile b/mlir-tensorrt/build_tools/bazel/Dockerfile new file mode 100644 index 000000000..01cd872d3 --- /dev/null +++ b/mlir-tensorrt/build_tools/bazel/Dockerfile @@ -0,0 +1,84 @@ +ARG BASE_IMAGE=nvcr.io/nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 +FROM ${BASE_IMAGE} + +# Specify user IDs +ARG GROUP +ARG GID +ARG USER +ARG UID + +# Run below commands as root +USER root + +############################# +# Basic Dependencies +############################# + +ARG DEBIAN_FRONTEND=noninteractive + +RUN <> /etc/sudoers + +# Switch to user +USER ${USER} \ No newline at end of file diff --git a/mlir-tensorrt/build_tools/bazel/run_docker.sh b/mlir-tensorrt/build_tools/bazel/run_docker.sh new file mode 100755 index 000000000..d09cb09d9 --- /dev/null +++ b/mlir-tensorrt/build_tools/bazel/run_docker.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +docker build -f build_tools/bazel/Dockerfile \ + -t mlir-tensorrt:dev \ + --build-arg BASE_IMAGE=nvcr.io/nvidia/cuda:12.5.1-cudnn-devel-ubuntu22.04 \ + --build-arg GROUP=$(id -gn) \ + --build-arg GID=$(id -g) \ + --build-arg USER=$(id -un) \ + --build-arg UID=$(id -u) \ + . + +docker run -it \ + -v "$(pwd)":"/opt/src/mlir-tensorrt" \ + -v "${HOME}/.cache/bazel":"${HOME}/.cache/bazel" \ + mlir-tensorrt:dev diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h index 5c6f5cbff..c74da6b11 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Registration/RegisterMlirTensorRtDialects.h @@ -66,7 +66,7 @@ #endif namespace mlir { - +#ifdef MLIR_TRT_ENABLE_EXECUTOR inline void registerAllMlirTensorRtExecutorDialects(DialectRegistry ®istry) { // Registration for executor dialect and all upstream dialects that can appear // in the host IR. @@ -84,6 +84,7 @@ inline void registerAllMlirTensorRtExecutorDialects(DialectRegistry ®istry) { tensor::registerSubsetOpInterfaceExternalModels(registry); scf::registerBufferizableOpInterfaceExternalModels(registry); } +#endif // MLIR_TRT_ENABLE_EXECUTOR inline void registerAllMlirTensorRtDialects(DialectRegistry ®istry) { registerCoreMlirTensorRtDialects(registry); diff --git a/mlir-tensorrt/deps.bzl b/mlir-tensorrt/deps.bzl new file mode 100644 index 000000000..dd2fb726d --- /dev/null +++ b/mlir-tensorrt/deps.bzl @@ -0,0 +1,104 @@ +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Also available under a BSD-style license. See LICENSE. + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +def third_party_deps(): + LLVM_COMMIT = "c49770c60f26e449379447109f7d915bd8de0384" + LLVM_SHA256 = "d4de275d04babe573d8634b9b19c8397de370c6544c01afae121f19413bbfba2" + http_archive( + name = "llvm-raw", + build_file_content = "# empty", + sha256 = LLVM_SHA256, + strip_prefix = "llvm-project-" + LLVM_COMMIT, + urls = ["https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT)], + # patches = [ + # "//:build_tools/llvm-project.patch", + # ], + # patch_args = ["-p1"], + ) + + SKYLIB_VERSION = "1.3.0" + http_archive( + name = "bazel_skylib", + sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/{version}/bazel-skylib-{version}.tar.gz".format(version = SKYLIB_VERSION), + "https://github.com/bazelbuild/bazel-skylib/releases/download/{version}/bazel-skylib-{version}.tar.gz".format(version = SKYLIB_VERSION), + ], + ) + + http_archive( + name = "llvm_zstd", + build_file = "@llvm-raw//utils/bazel/third_party_build:zstd.BUILD", + sha256 = "7c42d56fac126929a6a85dbc73ff1db2411d04f104fae9bdea51305663a83fd0", + strip_prefix = "zstd-1.5.2", + urls = [ + "https://github.com/facebook/zstd/releases/download/v1.5.2/zstd-1.5.2.tar.gz", + ], + ) + + http_archive( + name = "llvm_zlib", + build_file = "@llvm-raw//utils/bazel/third_party_build:zlib-ng.BUILD", + sha256 = "e36bb346c00472a1f9ff2a0a4643e590a254be6379da7cddd9daeb9a7f296731", + strip_prefix = "zlib-ng-2.0.7", + urls = [ + "https://github.com/zlib-ng/zlib-ng/archive/refs/tags/2.0.7.zip", + ], + ) + + http_archive( + name = "tensorrt10_x86", + build_file = "//:third_party/tensorrt10_x86.BUILD", + sha256 = "885ba84087d9633e07cdaf76b022a99c7460fbe42b487cabec6524409af2591b", + strip_prefix = "TensorRT-10.2.0.19", + url = "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/tars/TensorRT-10.2.0.19.Linux.x86_64-gnu.cuda-12.5.tar.gz", + ) + + RULES_CUDA_COMMIT = "a3e87114b41f78373f916ce1021183943c6057e9" + RULES_CUDA_SHA256 = "eb40d2ecabbd4dac8c13534cd3b97d7f9c8fb4aa2ae8bf6c1cc2c8b31bfaede9" + http_archive( + name = "rules_cuda", + sha256 = RULES_CUDA_SHA256, + strip_prefix = "rules_cuda-" + RULES_CUDA_COMMIT, + urls = ["https://github.com/bazel-contrib/rules_cuda/archive/{commit}.tar.gz".format(commit = RULES_CUDA_COMMIT)], + ) + + RULES_PYTHON_VERSION = "0.29.0" + RULES_PYTHON_SHA256 = "d71d2c67e0bce986e1c5a7731b4693226867c45bfe0b7c5e0067228a536fc580" + http_archive( + name = "rules_python", + sha256 = RULES_PYTHON_SHA256, + strip_prefix = "rules_python-{}".format(RULES_PYTHON_VERSION), + url = "https://github.com/bazelbuild/rules_python/releases/download/{}/rules_python-{}.tar.gz".format(RULES_PYTHON_VERSION, RULES_PYTHON_VERSION), + ) + + http_archive( + name = "io_bazel_rules_go", + sha256 = "6dc2da7ab4cf5d7bfc7c949776b1b7c733f05e56edc4bcd9022bb249d2e2a996", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_go/releases/download/v0.39.1/rules_go-v0.39.1.zip", + "https://github.com/bazelbuild/rules_go/releases/download/v0.39.1/rules_go-v0.39.1.zip", + ], + ) + + http_archive( + name = "bazel_gazelle", + sha256 = "727f3e4edd96ea20c29e8c2ca9e8d2af724d8c7778e7923a854b2c80952bc405", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-gazelle/releases/download/v0.30.0/bazel-gazelle-v0.30.0.tar.gz", + "https://github.com/bazelbuild/bazel-gazelle/releases/download/v0.30.0/bazel-gazelle-v0.30.0.tar.gz", + ], + ) + + http_archive( + name = "com_google_protobuf", + sha256 = "3bd7828aa5af4b13b99c191e8b1e884ebfa9ad371b0ce264605d347f135d2568", + strip_prefix = "protobuf-3.19.4", + urls = [ + "https://github.com/protocolbuffers/protobuf/archive/v3.19.4.tar.gz", + ], + ) diff --git a/mlir-tensorrt/requirements.txt b/mlir-tensorrt/requirements.txt new file mode 100644 index 000000000..bdf478f79 --- /dev/null +++ b/mlir-tensorrt/requirements.txt @@ -0,0 +1,3 @@ +nvidia-ml-py +click +numpy \ No newline at end of file diff --git a/mlir-tensorrt/requirements_lock.txt b/mlir-tensorrt/requirements_lock.txt new file mode 100644 index 000000000..95d99ca35 --- /dev/null +++ b/mlir-tensorrt/requirements_lock.txt @@ -0,0 +1,69 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# bazel run //:requirements.update +# +click==8.1.7 \ + --hash=sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28 \ + --hash=sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de + # via -r requirements.txt +numpy==2.1.2 \ + --hash=sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8 \ + --hash=sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466 \ + --hash=sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35 \ + --hash=sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c \ + --hash=sha256:13602b3174432a35b16c4cfb5de9a12d229727c3dd47a6ce35111f2ebdf66ff4 \ + --hash=sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6 \ + --hash=sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0 \ + --hash=sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7 \ + --hash=sha256:1ebec5fd716c5a5b3d8dfcc439be82a8407b7b24b230d0ad28a81b61c2f4659a \ + --hash=sha256:242b39d00e4944431a3cd2db2f5377e15b5785920421993770cddb89992c3f3a \ + --hash=sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e \ + --hash=sha256:2abbf905a0b568706391ec6fa15161fad0fb5d8b68d73c461b3c1bab6064dd62 \ + --hash=sha256:2cbba4b30bf31ddbe97f1c7205ef976909a93a66bb1583e983adbd155ba72ac2 \ + --hash=sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5 \ + --hash=sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee \ + --hash=sha256:32e16a03138cabe0cb28e1007ee82264296ac0983714094380b408097a418cfe \ + --hash=sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a \ + --hash=sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e \ + --hash=sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf \ + --hash=sha256:5006b13a06e0b38d561fab5ccc37581f23c9511879be7693bd33c7cd15ca227c \ + --hash=sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3 \ + --hash=sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86 \ + --hash=sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df \ + --hash=sha256:76322dcdb16fccf2ac56f99048af32259dcc488d9b7e25b51e5eca5147a3fb98 \ + --hash=sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d \ + --hash=sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2 \ + --hash=sha256:8e00ea6fc82e8a804433d3e9cedaa1051a1422cb6e443011590c14d2dea59146 \ + --hash=sha256:9c6c754df29ce6a89ed23afb25550d1c2d5fdb9901d9c67a16e0b16eaf7e2550 \ + --hash=sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8 \ + --hash=sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb \ + --hash=sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e \ + --hash=sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d \ + --hash=sha256:ab4754d432e3ac42d33a269c8567413bdb541689b02d93788af4131018cbf366 \ + --hash=sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0 \ + --hash=sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db \ + --hash=sha256:b42a1a511c81cc78cbc4539675713bbcf9d9c3913386243ceff0e9429ca892fe \ + --hash=sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426 \ + --hash=sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952 \ + --hash=sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03 \ + --hash=sha256:c82af4b2ddd2ee72d1fc0c6695048d457e00b3582ccde72d8a1c991b808bb20f \ + --hash=sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7 \ + --hash=sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b \ + --hash=sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17 \ + --hash=sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5 \ + --hash=sha256:e2b49c3c0804e8ecb05d59af8386ec2f74877f7ca8fd9c1e00be2672e4d399b1 \ + --hash=sha256:e585c8ae871fd38ac50598f4763d73ec5497b0de9a0ab4ef5b69f01c6a046142 \ + --hash=sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884 \ + --hash=sha256:ef444c57d664d35cac4e18c298c47d7b504c66b17c2ea91312e979fcfbdfb08a \ + --hash=sha256:f1eb068ead09f4994dec71c24b2844f1e4e4e013b9629f812f292f04bd1510d9 \ + --hash=sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445 \ + --hash=sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1 \ + --hash=sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1 \ + --hash=sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648 + # via -r requirements.txt +nvidia-ml-py==12.560.30 \ + --hash=sha256:f0254dc7400647680a072ee02509bfd46102b60bdfeca321576d4d4817e7fe97 \ + --hash=sha256:fea371c94d63e38a611c17bbb85fe400e9c8ddb9e8684a9cd0e47786a4bc3c73 + # via -r requirements.txt diff --git a/mlir-tensorrt/tensorrt/tensorrt-opt/tensorrt-opt.cpp b/mlir-tensorrt/tensorrt/tensorrt-opt/tensorrt-opt.cpp index b7c226734..42a890082 100644 --- a/mlir-tensorrt/tensorrt/tensorrt-opt/tensorrt-opt.cpp +++ b/mlir-tensorrt/tensorrt/tensorrt-opt/tensorrt-opt.cpp @@ -34,10 +34,12 @@ #include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "mlir/Transforms/Passes.h" +#ifdef MLIR_TRT_ENABLE_TESTING namespace mlir { void registerTestTensorKindAnalysisPass(); void registerTestTensorRTShapeInferencePass(); } // namespace mlir +#endif // MLIR_TRT_ENABLE_TESTING int main(int argc, char **argv) { mlir::DialectRegistry registry; @@ -45,8 +47,10 @@ int main(int argc, char **argv) { mlir::tensor::TensorDialect, mlir::arith::ArithDialect, mlir::affine::AffineDialect, mlir::quant::QuantizationDialect, mlir::scf::SCFDialect>(); +#ifdef MLIR_TRT_ENABLE_TESTING mlir::registerTestTensorKindAnalysisPass(); mlir::registerTestTensorRTShapeInferencePass(); +#endif // MLIR_TRT_ENABLE_TESTING mlir::func::registerInlinerExtension(registry); mlir::tensorrt::registerTensorRTTranslationCLOpts(); mlir::tensorrt::registerTensorRTPasses(); diff --git a/mlir-tensorrt/tensorrt/test/BUILD b/mlir-tensorrt/tensorrt/test/BUILD new file mode 100644 index 000000000..5d6659eda --- /dev/null +++ b/mlir-tensorrt/tensorrt/test/BUILD @@ -0,0 +1,69 @@ +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Also available under a BSD-style license. See LICENSE. + +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("@llvm-project//llvm:lit_test.bzl", "lit_test") +load("@pip_deps//:requirements.bzl", "requirement") + +expand_template( + name = "lit_site_cfg_py", + testonly = True, + out = "lit.site.cfg.py", + substitutions = { + "@LIT_SITE_CFG_IN_HEADER@": "# Autogenerated, do not edit.", + "\"@LLVM_TOOLS_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'llvm-project', 'llvm')", + "\"@MLIR_TENSORRT_DIALECT_BINARY_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'tensorrt-mlir')", + "\"@MLIR_TENSORRT_DIALECT_SOURCE_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'tensorrt-mlir', 'tensorrt')", + "@MLIR_ENABLE_BINDINGS_PYTHON@": "False", + "@MLIR_TRT_DIALECT_TARGET_TENSORRT@": "True", + "@ENABLE_ASAN@": "False", + }, + template = "lit.site.cfg.py.in", +) + +# Common data used by most lit tests. +filegroup( + name = "lit_data", + testonly = True, + data = [ + "gpu_tools.py", + "lit.cfg.py", + "lit.site.cfg.py", + "@llvm-project//llvm:FileCheck", + "@llvm-project//llvm:count", + "@llvm-project//llvm:not", + ], +) + +[ + lit_test( + name = "%s.test" % src, + srcs = [src], + data = [ + "//:tensorrt-opt", + "//tensorrt/test:lit_data", + ], + deps = [ + requirement("nvidia-ml-py"), + requirement("click"), + requirement("numpy"), + ], + tags = ["tensorrt_mlir_dialect_tests"], + ) + for src in glob( + [ + "Dialect/TensorRT/*.mlir", + ], + exclude = [ + # exclude MLIR_TRT_ENABLE_TESTING related tests + "TensorRT/tensor-kind-analysis.mlir", + ], + ) +] + +test_suite( + name = "tensorrt_mlir_dialect_tests", + tags = ["tensorrt_mlir_dialect_tests"], +) diff --git a/mlir-tensorrt/tensorrt/test/gpu_tools.py b/mlir-tensorrt/tensorrt/test/gpu_tools.py new file mode 100644 index 000000000..a5488d634 --- /dev/null +++ b/mlir-tensorrt/tensorrt/test/gpu_tools.py @@ -0,0 +1,131 @@ +"""Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. + +This file contains functions and a CLI implementation for querying available +GPUs, selecting a GPU for running a test workload, and estimating the number of +tests which should be allowed to run in parallel. +""" + +from contextlib import contextmanager +from typing import List, Tuple + +import click +import numpy as np +from pynvml import * + + +def get_uniform_devices() -> List[int]: + """Returns a list of device IDs matching the highest SM version + of all devices on the system. + """ + deviceCount = nvmlDeviceGetCount() + sm_versions = [] + for i in range(deviceCount): + handle = nvmlDeviceGetHandleByIndex(i) + cc = nvmlDeviceGetCudaComputeCapability(handle) + sm_versions.append(float(f"{cc[0]}.{cc[1]}")) + if len(sm_versions) == 0: + return [] + sm_versions = np.asarray(sm_versions) + max_version = sm_versions.max() + if not np.all(sm_versions == max_version): + return np.flatnonzero(sm_versions == max_version).tolist() + return list(x for x in range(deviceCount)) + + +def get_sm_version() -> Tuple[int, int]: + """Returns the largest/latest SM version among all devices on the host.""" + deviceCount = nvmlDeviceGetCount() + version = (0, 0) + for i in range(deviceCount): + handle = nvmlDeviceGetHandleByIndex(i) + cc = nvmlDeviceGetCudaComputeCapability(handle) + cc = (cc[0], cc[1]) + if cc > version: + version = cc + return version + + +@contextmanager +def nvml_context(*args, **kwargs): + """A context manager that handles NVML init and shutdown. Yields the + uniform devices list when entered into. + """ + nvmlInit() + try: + devices = get_uniform_devices() + yield devices + finally: + nvmlShutdown() + + +def get_stats(devices: List[int]) -> Tuple[List[float], List[float], List[float]]: + """Returns lists of available memory, GPU utilization rate, and GPU memory utilization rate""" + avail_mem_gb = [] + gpu_rates = [] + mem_rates = [] + for i in devices: + handle = nvmlDeviceGetHandleByIndex(i) + info = nvmlDeviceGetMemoryInfo(handle) + avail_mem_gb.append(float(info.free) / (1024.0 * 1024.0 * 1024.0)) + util_rates = nvmlDeviceGetUtilizationRates(handle) + gpu_rates.append(util_rates.gpu) + mem_rates.append(util_rates.memory) + return avail_mem_gb, gpu_rates, mem_rates + + +def select_device(devices: List[int]) -> int: + """Selects the device (that is among those with the highest SM version + if SM versions are not uniform) that has the most available GPU memory. + """ + assert len(devices) > 0 + avail_mem_gb, _, _ = get_stats(devices) + + # All devices have same SM version. + # Check utilization rates. + max_mem = int(np.argmax(avail_mem_gb)) + return max_mem + + +def estimate_parallelism_from_memory(devices: List[int], required_mem: float) -> int: + """Retrieves the sum total of free GPU memory across eligible devices and + divides by the required GB of GPU memory for a workload to yield the estimated + number of (single device) workloads that should be OK to run in parallel without + exhausting the available memory. + """ + if len(devices) == 0: + return 1 + mem_gb, _, _ = get_stats(devices) + avail_gb = sum(mem_gb) + return int(avail_gb / required_mem) + + +def has_fp8_support(): + """Returns True if the devices support FP8""" + return get_sm_version() >= (8, 9) + + +@click.group() +def cli(): + pass + + +@cli.command("pick-device") +def pick_device(): + with nvml_context() as devices: + if len(devices) == 0: + return + print(select_device(devices)) + return + + +@cli.command("get-parallelism") +@click.option( + "--required-mem", help="required GPU memory in GB", default=1.0, type=click.FLOAT +) +def get_parallelism(required_mem: float): + with nvml_context() as devices: + print(estimate_parallelism_from_memory(devices, required_mem)) + + +if __name__ == "__main__": + cli() diff --git a/mlir-tensorrt/tensorrt/test/lit.cfg.py b/mlir-tensorrt/tensorrt/test/lit.cfg.py index 0b717b09f..48d6e7b55 100644 --- a/mlir-tensorrt/tensorrt/test/lit.cfg.py +++ b/mlir-tensorrt/tensorrt/test/lit.cfg.py @@ -36,9 +36,7 @@ # subdirectories contain auxiliary inputs for various tests in their parent # directories. config.excludes = ["Inputs", "Examples", "CMakeLists.txt", "README.txt", "LICENSE.txt"] -config.tensorrt_dialect_tools_dir = os.path.join( - config.tensorrt_dialect_obj_root, "bin" -) +config.tensorrt_dialect_tools_dir = os.path.join(config.tensorrt_dialect_obj_root) config.tensorrt_dialect_libs_dir = os.path.join(config.tensorrt_dialect_obj_root, "lib") config.substitutions.append( ("%tensorrt_dialect_libs", config.tensorrt_dialect_libs_dir) diff --git a/mlir-tensorrt/tensorrt/test/lit.site.cfg.py.in b/mlir-tensorrt/tensorrt/test/lit.site.cfg.py.in index 9f7491ced..74002d4d9 100644 --- a/mlir-tensorrt/tensorrt/test/lit.site.cfg.py.in +++ b/mlir-tensorrt/tensorrt/test/lit.site.cfg.py.in @@ -18,7 +18,7 @@ config.gpu_tools_package_path = os.path.join( ) config.gpu_tools_script = os.path.join( "@MLIR_TENSORRT_DIALECT_SOURCE_DIR@", - "../python/mlir_tensorrt_tools/mlir_tensorrt/tools/gpu_tools.py", + "test/gpu_tools.py", ) def load_gpu_tools_module(): @@ -39,5 +39,4 @@ import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. -lit_config.load_config(config, "@MLIR_TENSORRT_DIALECT_SOURCE_DIR@/test/lit.cfg.py") - +lit_config.load_config(config, "@MLIR_TENSORRT_DIALECT_SOURCE_DIR@" + "/test/lit.cfg.py") diff --git a/mlir-tensorrt/test/BUILD b/mlir-tensorrt/test/BUILD new file mode 100644 index 000000000..58705aad0 --- /dev/null +++ b/mlir-tensorrt/test/BUILD @@ -0,0 +1,81 @@ +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Also available under a BSD-style license. See LICENSE. + +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("@llvm-project//llvm:lit_test.bzl", "lit_test") +load("@pip_deps//:requirements.bzl", "requirement") +load("//:version.bzl", "MLIR_TENSORRT_VERSION") + +expand_template( + name = "lit_site_cfg_py", + testonly = True, + out = "lit.site.cfg.py", + substitutions = { + "@LIT_SITE_CFG_IN_HEADER@": "# Autogenerated, do not edit.", + "\"@TENSORRT_MLIR_SOURCE_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'tensorrt-mlir')", + "\"@TENSORRT_MLIR_BINARY_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'tensorrt-mlir')", + "\"@LLVM_TOOLS_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'llvm-project', 'llvm')", + "\"@CMAKE_SOURCE_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'tensorrt-mlir')", + "\"@CMAKE_BINARY_DIR@\"": "os.path.join(os.environ['TEST_SRCDIR'], 'tensorrt-mlir')", + "@MLIR_TRT_TENSORRT_VERSION@": "{}".format(MLIR_TENSORRT_VERSION), + "@MLIR_TRT_ENABLE_PYTHON@": "False", + "@MLIR_TRT_ENABLE_ASSERTIONS@": "True", + "@MLIR_TRT_TARGET_CPP@": "False", + "@MLIR_TRT_TARGET_LUA@": "False", + "@MLIR_TRT_ENABLE_NCCL@": "False", + "@MLIR_TRT_ENABLE_EXECUTOR@": "False", + "@MLIR_TRT_ENABLE_HLO@": "False", + "@MLIR_TRT_TARGET_TENSORRT@": "True", + "@MLIR_TRT_WITH_ASAN@": "True", + "@ENABLE_ASAN@": "True", + }, + template = "lit.site.cfg.py.in", +) + +# Common data used by most lit tests. +filegroup( + name = "lit_data", + testonly = True, + data = [ + "gpu_tools.py", + "lit.cfg.py", + "lit.site.cfg.py", + "@llvm-project//llvm:FileCheck", + "@llvm-project//llvm:count", + "@llvm-project//llvm:not", + ], +) + +[ + lit_test( + name = "%s.test" % src, + srcs = [src], + data = [ + "//:mlir-tensorrt-opt", + "//test:lit_data", + ], + tags = ["lit_tests"], + deps = [ + requirement("nvidia-ml-py"), + requirement("click"), + requirement("numpy"), + ], + ) + for src in glob([ + "Dialect/**/*.mlir", + ]) +] + +test_suite( + name = "lit_tests", + tags = ["lit_tests"], +) + +test_suite( + name = "all_tests", + tests = [ + "//test:lit_tests", + ], +) diff --git a/mlir-tensorrt/test/gpu_tools.py b/mlir-tensorrt/test/gpu_tools.py new file mode 100644 index 000000000..a5488d634 --- /dev/null +++ b/mlir-tensorrt/test/gpu_tools.py @@ -0,0 +1,131 @@ +"""Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. + +This file contains functions and a CLI implementation for querying available +GPUs, selecting a GPU for running a test workload, and estimating the number of +tests which should be allowed to run in parallel. +""" + +from contextlib import contextmanager +from typing import List, Tuple + +import click +import numpy as np +from pynvml import * + + +def get_uniform_devices() -> List[int]: + """Returns a list of device IDs matching the highest SM version + of all devices on the system. + """ + deviceCount = nvmlDeviceGetCount() + sm_versions = [] + for i in range(deviceCount): + handle = nvmlDeviceGetHandleByIndex(i) + cc = nvmlDeviceGetCudaComputeCapability(handle) + sm_versions.append(float(f"{cc[0]}.{cc[1]}")) + if len(sm_versions) == 0: + return [] + sm_versions = np.asarray(sm_versions) + max_version = sm_versions.max() + if not np.all(sm_versions == max_version): + return np.flatnonzero(sm_versions == max_version).tolist() + return list(x for x in range(deviceCount)) + + +def get_sm_version() -> Tuple[int, int]: + """Returns the largest/latest SM version among all devices on the host.""" + deviceCount = nvmlDeviceGetCount() + version = (0, 0) + for i in range(deviceCount): + handle = nvmlDeviceGetHandleByIndex(i) + cc = nvmlDeviceGetCudaComputeCapability(handle) + cc = (cc[0], cc[1]) + if cc > version: + version = cc + return version + + +@contextmanager +def nvml_context(*args, **kwargs): + """A context manager that handles NVML init and shutdown. Yields the + uniform devices list when entered into. + """ + nvmlInit() + try: + devices = get_uniform_devices() + yield devices + finally: + nvmlShutdown() + + +def get_stats(devices: List[int]) -> Tuple[List[float], List[float], List[float]]: + """Returns lists of available memory, GPU utilization rate, and GPU memory utilization rate""" + avail_mem_gb = [] + gpu_rates = [] + mem_rates = [] + for i in devices: + handle = nvmlDeviceGetHandleByIndex(i) + info = nvmlDeviceGetMemoryInfo(handle) + avail_mem_gb.append(float(info.free) / (1024.0 * 1024.0 * 1024.0)) + util_rates = nvmlDeviceGetUtilizationRates(handle) + gpu_rates.append(util_rates.gpu) + mem_rates.append(util_rates.memory) + return avail_mem_gb, gpu_rates, mem_rates + + +def select_device(devices: List[int]) -> int: + """Selects the device (that is among those with the highest SM version + if SM versions are not uniform) that has the most available GPU memory. + """ + assert len(devices) > 0 + avail_mem_gb, _, _ = get_stats(devices) + + # All devices have same SM version. + # Check utilization rates. + max_mem = int(np.argmax(avail_mem_gb)) + return max_mem + + +def estimate_parallelism_from_memory(devices: List[int], required_mem: float) -> int: + """Retrieves the sum total of free GPU memory across eligible devices and + divides by the required GB of GPU memory for a workload to yield the estimated + number of (single device) workloads that should be OK to run in parallel without + exhausting the available memory. + """ + if len(devices) == 0: + return 1 + mem_gb, _, _ = get_stats(devices) + avail_gb = sum(mem_gb) + return int(avail_gb / required_mem) + + +def has_fp8_support(): + """Returns True if the devices support FP8""" + return get_sm_version() >= (8, 9) + + +@click.group() +def cli(): + pass + + +@cli.command("pick-device") +def pick_device(): + with nvml_context() as devices: + if len(devices) == 0: + return + print(select_device(devices)) + return + + +@cli.command("get-parallelism") +@click.option( + "--required-mem", help="required GPU memory in GB", default=1.0, type=click.FLOAT +) +def get_parallelism(required_mem: float): + with nvml_context() as devices: + print(estimate_parallelism_from_memory(devices, required_mem)) + + +if __name__ == "__main__": + cli() diff --git a/mlir-tensorrt/test/lit.cfg.py b/mlir-tensorrt/test/lit.cfg.py index 86127d9bc..fb4342bee 100644 --- a/mlir-tensorrt/test/lit.cfg.py +++ b/mlir-tensorrt/test/lit.cfg.py @@ -31,7 +31,7 @@ config.test_source_root = os.path.dirname(__file__) config.gpu_tools_script = os.path.join( config.test_source_root, - "../python/mlir_tensorrt_tools/mlir_tensorrt/tools/gpu_tools.py", + "gpu_tools.py", ) @@ -59,7 +59,7 @@ def estimate_paralllelism(mem_required: float) -> int: config.test_exec_root = os.path.join(config.mlir_tensorrt_obj_root, "test") # mlir_tensorrt_tools_dir: binary output path for tool executables -config.mlir_tensorrt_tools_dir = os.path.join(config.mlir_tensorrt_obj_root, "bin") +config.mlir_tensorrt_tools_dir = os.path.join(config.mlir_tensorrt_obj_root) # Add additional expansions that can be used in the `RUN:` commands. config.substitutions.append(("%PATH%", config.environment["PATH"])) diff --git a/mlir-tensorrt/test/lit.site.cfg.py.in b/mlir-tensorrt/test/lit.site.cfg.py.in index e307dadb8..2e12b1de4 100644 --- a/mlir-tensorrt/test/lit.site.cfg.py.in +++ b/mlir-tensorrt/test/lit.site.cfg.py.in @@ -31,4 +31,4 @@ import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. -lit_config.load_config(config, "@CMAKE_SOURCE_DIR@/test/lit.cfg.py") +lit_config.load_config(config, "@CMAKE_SOURCE_DIR@" + "/test/lit.cfg.py") diff --git a/mlir-tensorrt/third_party/tensorrt10_x86.BUILD b/mlir-tensorrt/third_party/tensorrt10_x86.BUILD new file mode 100644 index 000000000..b0cb70bb4 --- /dev/null +++ b/mlir-tensorrt/third_party/tensorrt10_x86.BUILD @@ -0,0 +1,36 @@ +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Also available under a BSD-style license. See LICENSE. +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "tensorrt10", + srcs = [ + # This dep throws an error: + # bazel-bin/mlir-tensorrt-opt: error while loading shared libraries: do_not_link_against_nvinfer_builder_resource: cannot open shared object file: No such file or directory + # "lib/libnvinfer_builder_resource.so.10.2.0", + "lib/libnvinfer.so.10.2.0", + "lib/libnvinfer_plugin.so.10.2.0", + "lib/libnvinfer_dispatch.so.10.2.0", + "lib/libnvinfer_lean.so.10.2.0", + "lib/libnvinfer_vc_plugin.so.10.2.0", + "lib/libnvonnxparser.so.10.2.0", + ], + hdrs = [ + "include/NvInfer.h", + "include/NvInferConsistency.h", + "include/NvInferConsistencyImpl.h", + "include/NvInferImpl.h", + "include/NvInferLegacyDims.h", + "include/NvInferPlugin.h", + "include/NvInferPluginUtils.h", + "include/NvInferRuntime.h", + "include/NvInferRuntimeBase.h", + "include/NvInferRuntimeCommon.h", + "include/NvInferRuntimePlugin.h", + "include/NvInferSafeRuntime.h", + "include/NvInferVersion.h", + ], + includes = ["include"], +) diff --git a/mlir-tensorrt/version.bzl b/mlir-tensorrt/version.bzl new file mode 100644 index 000000000..d56d0a6a7 --- /dev/null +++ b/mlir-tensorrt/version.bzl @@ -0,0 +1,6 @@ +# version.bzl + +MLIR_TENSORRT_VERSION_MAJOR = "0" +MLIR_TENSORRT_VERSION_MINOR = "1" +MLIR_TENSORRT_VERSION_PATCH = "34" +MLIR_TENSORRT_VERSION = "{}.{}.{}".format(MLIR_TENSORRT_VERSION_MAJOR, MLIR_TENSORRT_VERSION_MINOR, MLIR_TENSORRT_VERSION_PATCH)