From 6af3eef5c7279e5fd9d8625e0ea6c463bec1c261 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 15 Aug 2024 16:29:07 -0400 Subject: [PATCH 001/183] Setup latest dependency for new bufferization passes --- .dep-versions | 2 +- frontend/catalyst/__init__.py | 2 +- mlir/Makefile | 2 +- mlir/llvm-project | 2 +- mlir/mlir-hlo | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.dep-versions b/.dep-versions index 5480b17f2f..bfd7917e8f 100644 --- a/.dep-versions +++ b/.dep-versions @@ -1,5 +1,5 @@ # Always update the version check in catalyst.__init__ when changing the JAX version. -jax=0.4.28 +jax=0.4.31 mhlo=89a891c986650c33df76885f5620e0a92150d90f llvm=3a8316216807d64a586b971f51695e23883331f7 enzyme=v0.0.130 diff --git a/frontend/catalyst/__init__.py b/frontend/catalyst/__init__.py index af9d10d1d3..aeac5ffd69 100644 --- a/frontend/catalyst/__init__.py +++ b/frontend/catalyst/__init__.py @@ -23,7 +23,7 @@ import jaxlib as _jaxlib -_jaxlib_version = "0.4.28" +_jaxlib_version = "0.4.31" if _jaxlib.__version__ != _jaxlib_version: import warnings diff --git a/mlir/Makefile b/mlir/Makefile index 1d5a126ef6..797f75abd2 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -118,7 +118,7 @@ enzyme: -DCMAKE_CXX_VISIBILITY_PRESET=$(SYMBOL_VISIBILITY) \ -DCMAKE_POLICY_DEFAULT_CMP0116=NEW - cmake --build $(ENZYME_BUILD_DIR) --target EnzymeStatic-19 + cmake --build $(ENZYME_BUILD_DIR) --target EnzymeStatic-20 .PHONY: dialects dialects: diff --git a/mlir/llvm-project b/mlir/llvm-project index 3a83162168..51d4980a13 160000 --- a/mlir/llvm-project +++ b/mlir/llvm-project @@ -1 +1 @@ -Subproject commit 3a8316216807d64a586b971f51695e23883331f7 +Subproject commit 51d4980a133db12888207698e39c469cb7055cac diff --git a/mlir/mlir-hlo b/mlir/mlir-hlo index 89a891c986..1d15157654 160000 --- a/mlir/mlir-hlo +++ b/mlir/mlir-hlo @@ -1 +1 @@ -Subproject commit 89a891c986650c33df76885f5620e0a92150d90f +Subproject commit 1d151576543aebf134f76ae1ebf86da11623b912 From 951ef36784e4bb33e35b0bbc11da9c1483ac7971 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 15 Aug 2024 16:30:43 -0400 Subject: [PATCH 002/183] enableRegionSimplification accepts GreedySimplifyRegionLevel type instead of bool --- mlir/lib/Catalyst/Transforms/DetectQNodes.cpp | 2 +- mlir/lib/Quantum/Transforms/emit_catalyst_pyface.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp b/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp index 6acdb8c2bf..242421da81 100644 --- a/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp +++ b/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp @@ -889,7 +889,7 @@ struct AddExceptionHandlingPass : impl::AddExceptionHandlingPassBase(context); GreedyRewriteConfig config; config.strictMode = GreedyRewriteStrictness::ExistingOps; - config.enableRegionSimplification = false; + config.enableRegionSimplification = GreedySimplifyRegionLevel::Disabled; config.maxIterations = 1; auto op = getOperation(); From 21e5933c94ce7615f6468ae3ff8246f13fafc28f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 15 Aug 2024 16:36:06 -0400 Subject: [PATCH 003/183] translateModuleToLLVMIR needs an extra disableVerification parameter --- mlir/lib/Driver/CompilerDriver.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Driver/CompilerDriver.cpp b/mlir/lib/Driver/CompilerDriver.cpp index 8b775667c8..af19b09038 100644 --- a/mlir/lib/Driver/CompilerDriver.cpp +++ b/mlir/lib/Driver/CompilerDriver.cpp @@ -662,8 +662,9 @@ LogicalResult QuantumDriverMain(const CompilerOptions &options, CompilerOutput & outIRStream << *op; if (options.lowerToLLVM) { - llvmModule = timer::timer(translateModuleToLLVMIR, "translateModuleToLLVMIR", - /* add_endl */ false, *op, llvmContext, "LLVMDialectModule"); + llvmModule = + timer::timer(translateModuleToLLVMIR, "translateModuleToLLVMIR", + /* add_endl */ false, *op, llvmContext, "LLVMDialectModule", false); if (!llvmModule) { CO_MSG(options, Verbosity::Urgent, "Failed to translate LLVM module\n"); return failure(); From c8336bdcf80395fde64b34d2fce26ab9c3609414 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 15 Aug 2024 17:10:53 -0400 Subject: [PATCH 004/183] Replace equals with compare for llvm::StringRef --- mlir/lib/Catalyst/Transforms/AsyncUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp b/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp index e86cbb9c7d..28026e009b 100644 --- a/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp +++ b/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp @@ -207,7 +207,7 @@ std::optional AsyncUtils::getCalleeSafe(LLVM::CallOp callOp) bool AsyncUtils::isFunctionNamed(LLVM::LLVMFuncOp funcOp, llvm::StringRef expectedName) { llvm::StringRef observedName = funcOp.getSymName(); - return observedName.equals(expectedName); + return observedName.compare(expectedName) == 0; } bool AsyncUtils::isMlirAsyncRuntimeCreateValue(LLVM::LLVMFuncOp funcOp) From 9e6ae3f97e8c65409224c5d97df13a0148612518 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 16 Aug 2024 11:41:22 -0400 Subject: [PATCH 005/183] Keep using jax 0.4.28 but update llvm (from jax 0.4.29) --- .dep-versions | 6 +++--- frontend/catalyst/__init__.py | 2 +- mlir/llvm-project | 2 +- mlir/mlir-hlo | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.dep-versions b/.dep-versions index bfd7917e8f..992c9ff241 100644 --- a/.dep-versions +++ b/.dep-versions @@ -1,7 +1,7 @@ # Always update the version check in catalyst.__init__ when changing the JAX version. -jax=0.4.31 -mhlo=89a891c986650c33df76885f5620e0a92150d90f -llvm=3a8316216807d64a586b971f51695e23883331f7 +jax=0.4.28 +mhlo=39c37c43fb9db18144f2e155a0fe65864646a968 +llvm=6f2c61071c274a1b5e212e6ad4114641ec7c7fc3 enzyme=v0.0.130 # Always remove custom PL/LQ versions before release. diff --git a/frontend/catalyst/__init__.py b/frontend/catalyst/__init__.py index aeac5ffd69..af9d10d1d3 100644 --- a/frontend/catalyst/__init__.py +++ b/frontend/catalyst/__init__.py @@ -23,7 +23,7 @@ import jaxlib as _jaxlib -_jaxlib_version = "0.4.31" +_jaxlib_version = "0.4.28" if _jaxlib.__version__ != _jaxlib_version: import warnings diff --git a/mlir/llvm-project b/mlir/llvm-project index 51d4980a13..6f2c61071c 160000 --- a/mlir/llvm-project +++ b/mlir/llvm-project @@ -1 +1 @@ -Subproject commit 51d4980a133db12888207698e39c469cb7055cac +Subproject commit 6f2c61071c274a1b5e212e6ad4114641ec7c7fc3 diff --git a/mlir/mlir-hlo b/mlir/mlir-hlo index 1d15157654..39c37c43fb 160000 --- a/mlir/mlir-hlo +++ b/mlir/mlir-hlo @@ -1 +1 @@ -Subproject commit 1d151576543aebf134f76ae1ebf86da11623b912 +Subproject commit 39c37c43fb9db18144f2e155a0fe65864646a968 From 0cf462bcc03fd1a5a45c9ad3343217a737d5f74b Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 16 Aug 2024 11:42:17 -0400 Subject: [PATCH 006/183] Roll back EnzymeStatic to 19 --- mlir/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/Makefile b/mlir/Makefile index 797f75abd2..1d5a126ef6 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -118,7 +118,7 @@ enzyme: -DCMAKE_CXX_VISIBILITY_PRESET=$(SYMBOL_VISIBILITY) \ -DCMAKE_POLICY_DEFAULT_CMP0116=NEW - cmake --build $(ENZYME_BUILD_DIR) --target EnzymeStatic-20 + cmake --build $(ENZYME_BUILD_DIR) --target EnzymeStatic-19 .PHONY: dialects dialects: From 39b25378d190caa3992eb5433cc2adaef797cf5a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 16 Aug 2024 12:06:54 -0400 Subject: [PATCH 007/183] Use == instead of compare --- mlir/lib/Catalyst/Transforms/AsyncUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp b/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp index 28026e009b..83dd657dae 100644 --- a/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp +++ b/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp @@ -207,7 +207,7 @@ std::optional AsyncUtils::getCalleeSafe(LLVM::CallOp callOp) bool AsyncUtils::isFunctionNamed(LLVM::LLVMFuncOp funcOp, llvm::StringRef expectedName) { llvm::StringRef observedName = funcOp.getSymName(); - return observedName.compare(expectedName) == 0; + return observedName == expectedName; } bool AsyncUtils::isMlirAsyncRuntimeCreateValue(LLVM::LLVMFuncOp funcOp) From 5bad7413ea5ac1b418400513d677af45827d1c93 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 16 Aug 2024 12:07:26 -0400 Subject: [PATCH 008/183] Roll back enableRegionSimplification type --- mlir/lib/Catalyst/Transforms/DetectQNodes.cpp | 2 +- mlir/lib/Quantum/Transforms/emit_catalyst_pyface.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp b/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp index 242421da81..6acdb8c2bf 100644 --- a/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp +++ b/mlir/lib/Catalyst/Transforms/DetectQNodes.cpp @@ -889,7 +889,7 @@ struct AddExceptionHandlingPass : impl::AddExceptionHandlingPassBase(context); GreedyRewriteConfig config; config.strictMode = GreedyRewriteStrictness::ExistingOps; - config.enableRegionSimplification = GreedySimplifyRegionLevel::Disabled; + config.enableRegionSimplification = false; config.maxIterations = 1; auto op = getOperation(); From 4e9cc289d44eb1490583c6defca60bad0dae7866 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 16 Aug 2024 14:36:22 -0400 Subject: [PATCH 009/183] Comment out retired bufferization passes --- frontend/catalyst/compiler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index c24ea994f6..e8635aaa23 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -198,13 +198,13 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "scf-bufferize", "convert-tensor-to-linalg", # tensor.pad "convert-elementwise-to-linalg", # Must be run before --arith-bufferize - "arith-bufferize", + #"arith-bufferize", "empty-tensor-to-alloc-tensor", - "func.func(bufferization-bufferize)", - "func.func(tensor-bufferize)", + #"func.func(bufferization-bufferize)", + #"func.func(tensor-bufferize)", "catalyst-bufferize", # Must be run before -- func.func(linalg-bufferize) - "func.func(linalg-bufferize)", - "func.func(tensor-bufferize)", + #"func.func(linalg-bufferize)", + #"func.func(tensor-bufferize)", "quantum-bufferize", "func-bufferize", "func.func(finalizing-bufferize)", From ad22787af4380ec920571bd672e07e03cc187433 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 20 Aug 2024 11:05:11 -0400 Subject: [PATCH 010/183] Temporarily disable finalizing-bufferization for debugging --- frontend/catalyst/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index e8635aaa23..d6d0e9bdb6 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -207,7 +207,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt #"func.func(tensor-bufferize)", "quantum-bufferize", "func-bufferize", - "func.func(finalizing-bufferize)", + #"func.func(finalizing-bufferize)", "canonicalize", # Remove dead memrefToTensorOp's # introduced during gradient-bufferize of callbacks "func.func(buffer-hoisting)", From 0852bccb57f5106a75fbe6d84c54aab8568aef88 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 20 Aug 2024 16:41:45 -0400 Subject: [PATCH 011/183] Reformat compiler.py --- frontend/catalyst/compiler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 8bb86e2791..a13bd44194 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -199,16 +199,16 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "scf-bufferize", "convert-tensor-to-linalg", # tensor.pad "convert-elementwise-to-linalg", # Must be run before --arith-bufferize - #"arith-bufferize", + # "arith-bufferize", "empty-tensor-to-alloc-tensor", - #"func.func(bufferization-bufferize)", - #"func.func(tensor-bufferize)", + # "func.func(bufferization-bufferize)", + # "func.func(tensor-bufferize)", "catalyst-bufferize", # Must be run before -- func.func(linalg-bufferize) - #"func.func(linalg-bufferize)", - #"func.func(tensor-bufferize)", + # "func.func(linalg-bufferize)", + # "func.func(tensor-bufferize)", "quantum-bufferize", "func-bufferize", - #"func.func(finalizing-bufferize)", + # "func.func(finalizing-bufferize)", "canonicalize", # Remove dead memrefToTensorOp's # introduced during gradient-bufferize of callbacks "func.func(buffer-hoisting)", From ffcd4d3eb64faef40a2690f0e6610dca66998e3a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 20 Aug 2024 18:52:43 -0400 Subject: [PATCH 012/183] Remove all passes and use one-shot-bufferize only --- frontend/catalyst/compiler.py | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index a13bd44194..7accd85ee1 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -193,33 +193,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt BUFFERIZATION_PASS = ( "BufferizationPass", [ - "one-shot-bufferize{dialect-filter=memref}", - "inline", - "gradient-bufferize", - "scf-bufferize", - "convert-tensor-to-linalg", # tensor.pad - "convert-elementwise-to-linalg", # Must be run before --arith-bufferize - # "arith-bufferize", - "empty-tensor-to-alloc-tensor", - # "func.func(bufferization-bufferize)", - # "func.func(tensor-bufferize)", - "catalyst-bufferize", # Must be run before -- func.func(linalg-bufferize) - # "func.func(linalg-bufferize)", - # "func.func(tensor-bufferize)", - "quantum-bufferize", - "func-bufferize", - # "func.func(finalizing-bufferize)", - "canonicalize", # Remove dead memrefToTensorOp's - # introduced during gradient-bufferize of callbacks - "func.func(buffer-hoisting)", - "func.func(buffer-loop-hoisting)", - "func.func(buffer-deallocation)", - "convert-arraylist-to-memref", - "convert-bufferization-to-memref", - "canonicalize", # Must be after convert-bufferization-to-memref - # otherwise there are issues in lowering of dynamic tensors. - # "cse", - "cp-global-memref", + "one-shot-bufferize{bufferize-function-boundaries}", ], ) @@ -275,6 +249,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt HLO_LOWERING_PASS, QUANTUM_COMPILATION_PASS, BUFFERIZATION_PASS, + #BUFFERIZATION_PASS2, + #BUFFERIZATION_PASS3, MLIR_TO_LLVM_PASS, ] @@ -290,6 +266,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt HLO_LOWERING_PASS, QUANTUM_COMPILATION_PASS, BUFFERIZATION_PASS, + #BUFFERIZATION_PASS2, + #BUFFERIZATION_PASS3, MLIR_TO_LLVM_ASYNC_PASS, ] From 8b45ae8429410e19b4069d7deb5dc2d8b5e5dcd7 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 21 Aug 2024 09:59:48 -0400 Subject: [PATCH 013/183] Add new buffer passes except for the deallocation pipeline --- frontend/catalyst/compiler.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 7accd85ee1..862e202c0e 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -193,7 +193,14 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt BUFFERIZATION_PASS = ( "BufferizationPass", [ + "eliminate-empty-tensors", "one-shot-bufferize{bufferize-function-boundaries}", + "func.func(buffer-hoisting)", + "func.func(buffer-loop-hoisting)", + "buffer-results-to-out-params", + "drop-equivalent-buffer-results", + "func.func(promote-buffers-to-stack)", + #"buffer-deallocation-pipeline", ], ) From 4d20def85cab458e7ddf297d9424c1573f109e01 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 21 Aug 2024 12:05:47 -0400 Subject: [PATCH 014/183] Temporarily set side effect of InitOp and FinalizeOp as zero --- frontend/catalyst/compiler.py | 5 ++++- mlir/include/Quantum/IR/QuantumOps.td | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 862e202c0e..6bfef707ae 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,12 +195,15 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "one-shot-bufferize{bufferize-function-boundaries}", + #"gradient-bufferize", + #"catalyst-bufferize", + #"quantum-bufferize", "func.func(buffer-hoisting)", "func.func(buffer-loop-hoisting)", "buffer-results-to-out-params", "drop-equivalent-buffer-results", "func.func(promote-buffers-to-stack)", - #"buffer-deallocation-pipeline", + "buffer-deallocation-pipeline", ], ) diff --git a/mlir/include/Quantum/IR/QuantumOps.td b/mlir/include/Quantum/IR/QuantumOps.td index dd81b8f52d..10c4cd50f6 100644 --- a/mlir/include/Quantum/IR/QuantumOps.td +++ b/mlir/include/Quantum/IR/QuantumOps.td @@ -63,7 +63,7 @@ def NamedObservableAttr : EnumAttr traits = []> : Op; -def InitializeOp : Quantum_Op<"init"> { +def InitializeOp : Quantum_Op<"init", [NoMemoryEffect]> { let summary = "Initialize the quantum runtime."; let assemblyFormat = [{ @@ -71,7 +71,7 @@ def InitializeOp : Quantum_Op<"init"> { }]; } -def FinalizeOp : Quantum_Op<"finalize"> { +def FinalizeOp : Quantum_Op<"finalize", [NoMemoryEffect]> { let summary = "Teardown the quantum runtime."; let assemblyFormat = [{ From d9b38ba39cb71cca03ea69709600c25a4b5ae6e4 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 21 Aug 2024 15:42:46 -0400 Subject: [PATCH 015/183] Disable deallocation pipeline for debugging --- frontend/catalyst/compiler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 6bfef707ae..862e202c0e 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,15 +195,12 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "one-shot-bufferize{bufferize-function-boundaries}", - #"gradient-bufferize", - #"catalyst-bufferize", - #"quantum-bufferize", "func.func(buffer-hoisting)", "func.func(buffer-loop-hoisting)", "buffer-results-to-out-params", "drop-equivalent-buffer-results", "func.func(promote-buffers-to-stack)", - "buffer-deallocation-pipeline", + #"buffer-deallocation-pipeline", ], ) From 676053f9547e616ef4bdd01ad4966f7541606bf1 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 21 Aug 2024 15:43:41 -0400 Subject: [PATCH 016/183] Draft Quantum Impl of BufferizableOpInterface --- .../Quantum/Transforms/BufferizableOpInterfaceImpl.h | 12 ++++++++++++ mlir/lib/Quantum/IR/QuantumDialect.cpp | 3 +++ .../Transforms/BufferizableOpInterfaceImpl.cpp | 11 +++++++++++ mlir/lib/Quantum/Transforms/CMakeLists.txt | 1 + 4 files changed, 27 insertions(+) create mode 100644 mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h create mode 100644 mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp diff --git a/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h new file mode 100644 index 0000000000..069f4b3ee4 --- /dev/null +++ b/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h @@ -0,0 +1,12 @@ +#ifndef MLIR_DIALECT_QUANTUM_BUFFERIZABLEOPINTERFACEIMPL_H +#define MLIR_DIALECT_QUANTUM_BUFFERIZABLEOPINTERFACEIMPL_H + +namespace mlir { +class DialectRegistry; + +namespace tensor { +void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); +} // namespace tensor +} // namespace mlir + +#endif // MLIR_DIALECT_QUANTUM_BUFFERIZABLEOPINTERFACEIMPL_H \ No newline at end of file diff --git a/mlir/lib/Quantum/IR/QuantumDialect.cpp b/mlir/lib/Quantum/IR/QuantumDialect.cpp index 385f4e0ae5..04bfe34f2c 100644 --- a/mlir/lib/Quantum/IR/QuantumDialect.cpp +++ b/mlir/lib/Quantum/IR/QuantumDialect.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/IR/DialectImplementation.h" // needed for generated type parser #include "llvm/ADT/TypeSwitch.h" // needed for generated type parser @@ -43,6 +44,8 @@ void QuantumDialect::initialize() #define GET_OP_LIST #include "Quantum/IR/QuantumOps.cpp.inc" >(); + + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp new file mode 100644 index 0000000000..a92aa1da79 --- /dev/null +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -0,0 +1,11 @@ +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" + +#include "Quantum/IR/QuantumOps.h" +#include "Quantum/Transforms/BufferizableOpInterfaceImpl.h" + +using namespace mlir; +using namespace catalyst::quantum; + +namespace { + +} \ No newline at end of file diff --git a/mlir/lib/Quantum/Transforms/CMakeLists.txt b/mlir/lib/Quantum/Transforms/CMakeLists.txt index 51118a3878..22489d497b 100644 --- a/mlir/lib/Quantum/Transforms/CMakeLists.txt +++ b/mlir/lib/Quantum/Transforms/CMakeLists.txt @@ -1,6 +1,7 @@ set(LIBRARY_NAME quantum-transforms) file(GLOB SRC + BufferizableOpInterfaceImpl.cpp BufferizationPatterns.cpp quantum_bufferize.cpp ConversionPatterns.cpp From 3febbf7a8e2cee3524ce076cdd90e5ce8dd30855 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 21 Aug 2024 17:53:28 -0400 Subject: [PATCH 017/183] Add empty one-shot-bufferize for quantum::ExtractOp --- .../Transforms/BufferizableOpInterfaceImpl.h | 19 +++++----- .../BufferizableOpInterfaceImpl.cpp | 36 +++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h index 069f4b3ee4..bf60013f70 100644 --- a/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/Quantum/Transforms/BufferizableOpInterfaceImpl.h @@ -1,12 +1,13 @@ -#ifndef MLIR_DIALECT_QUANTUM_BUFFERIZABLEOPINTERFACEIMPL_H -#define MLIR_DIALECT_QUANTUM_BUFFERIZABLEOPINTERFACEIMPL_H +#pragma once -namespace mlir { -class DialectRegistry; +using namespace mlir; -namespace tensor { -void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); -} // namespace tensor -} // namespace mlir +namespace catalyst { -#endif // MLIR_DIALECT_QUANTUM_BUFFERIZABLEOPINTERFACEIMPL_H \ No newline at end of file +namespace quantum { + +void registerBufferizableOpInterfaceExternalModels(mlir::DialectRegistry ®istry); + +} + +} // namespace catalyst diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index a92aa1da79..f3dbb521e2 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,3 +1,4 @@ +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "Quantum/IR/QuantumOps.h" @@ -8,4 +9,39 @@ using namespace catalyst::quantum; namespace { +/// Bufferization of tensor.extract. Replace with memref.load. +struct ExtractOpInterface + : public mlir::bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return false; + } + + mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, + mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, + const mlir::bufferization::BufferizationOptions &options) const { + auto extractOp = cast(op); + + return success(); + } +}; + +} + +void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { + ExtractOp::attachInterface(*ctx); + }); } \ No newline at end of file From a354e3fe8efffbfad599d714a5990418f7bbf966 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 26 Aug 2024 13:46:19 -0400 Subject: [PATCH 018/183] Temporarily get rid of memory error --- frontend/catalyst/compiler.py | 6 +++--- mlir/include/Quantum/IR/QuantumOps.td | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 862e202c0e..6a1bf8242c 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -197,9 +197,9 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "one-shot-bufferize{bufferize-function-boundaries}", "func.func(buffer-hoisting)", "func.func(buffer-loop-hoisting)", - "buffer-results-to-out-params", - "drop-equivalent-buffer-results", - "func.func(promote-buffers-to-stack)", + #"buffer-results-to-out-params", + #"drop-equivalent-buffer-results", + #"func.func(promote-buffers-to-stack)", #"buffer-deallocation-pipeline", ], ) diff --git a/mlir/include/Quantum/IR/QuantumOps.td b/mlir/include/Quantum/IR/QuantumOps.td index 10c4cd50f6..dd81b8f52d 100644 --- a/mlir/include/Quantum/IR/QuantumOps.td +++ b/mlir/include/Quantum/IR/QuantumOps.td @@ -63,7 +63,7 @@ def NamedObservableAttr : EnumAttr traits = []> : Op; -def InitializeOp : Quantum_Op<"init", [NoMemoryEffect]> { +def InitializeOp : Quantum_Op<"init"> { let summary = "Initialize the quantum runtime."; let assemblyFormat = [{ @@ -71,7 +71,7 @@ def InitializeOp : Quantum_Op<"init", [NoMemoryEffect]> { }]; } -def FinalizeOp : Quantum_Op<"finalize", [NoMemoryEffect]> { +def FinalizeOp : Quantum_Op<"finalize"> { let summary = "Teardown the quantum runtime."; let assemblyFormat = [{ From 7de0d80668e18a13641837c283831aadc91f6514 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 26 Aug 2024 14:03:39 -0400 Subject: [PATCH 019/183] Register new one-shot bufferization pass --- mlir/lib/Driver/CompilerDriver.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlir/lib/Driver/CompilerDriver.cpp b/mlir/lib/Driver/CompilerDriver.cpp index af19b09038..99a03f2347 100644 --- a/mlir/lib/Driver/CompilerDriver.cpp +++ b/mlir/lib/Driver/CompilerDriver.cpp @@ -64,6 +64,7 @@ #include "Mitigation/Transforms/Passes.h" #include "Quantum/IR/QuantumDialect.h" #include "Quantum/Transforms/Passes.h" +#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" #include "Enzyme.h" #include "Timer.hpp" @@ -71,6 +72,7 @@ using namespace mlir; using namespace catalyst; using namespace catalyst::driver; +using namespace catalyst::quantum; namespace catalyst::utils { @@ -300,6 +302,9 @@ void registerAllCatalystDialects(DialectRegistry ®istry) registry.insert(); registry.insert(); registry.insert(); + + // Extend one-shot bufferization pass. + catalyst::quantum::registerBufferizableOpInterfaceExternalModels(registry); } } // namespace From af6f65a401d1f8948013f49c7b54ef5f6df47edc Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 26 Aug 2024 14:22:23 -0400 Subject: [PATCH 020/183] Fix include typos --- mlir/lib/Driver/CompilerDriver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Driver/CompilerDriver.cpp b/mlir/lib/Driver/CompilerDriver.cpp index 99a03f2347..07d92c6a58 100644 --- a/mlir/lib/Driver/CompilerDriver.cpp +++ b/mlir/lib/Driver/CompilerDriver.cpp @@ -64,7 +64,7 @@ #include "Mitigation/Transforms/Passes.h" #include "Quantum/IR/QuantumDialect.h" #include "Quantum/Transforms/Passes.h" -#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" +#include "Quantum/Transforms/BufferizableOpInterfaceImpl.h" #include "Enzyme.h" #include "Timer.hpp" From 1edbcd2bffb97832e17b77225e59b002e4b4b4a9 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 26 Aug 2024 14:27:31 -0400 Subject: [PATCH 021/183] Replace ExtractOP with StateOp for the first bufferization example --- .../Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index f3dbb521e2..11f3f614e9 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -10,9 +10,9 @@ using namespace catalyst::quantum; namespace { /// Bufferization of tensor.extract. Replace with memref.load. -struct ExtractOpInterface - : public mlir::bufferization::BufferizableOpInterface::ExternalModel { +struct StateOpInterface + : public mlir::bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { return true; @@ -31,7 +31,7 @@ struct ExtractOpInterface LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, const mlir::bufferization::BufferizationOptions &options) const { - auto extractOp = cast(op); + auto StateOp = cast(op); return success(); } @@ -42,6 +42,6 @@ struct ExtractOpInterface void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { - ExtractOp::attachInterface(*ctx); + StateOp::attachInterface(*ctx); }); } \ No newline at end of file From f94dcf785dbc15b5247b0a5fc6a203ff55af7d81 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 27 Aug 2024 15:39:32 -0400 Subject: [PATCH 022/183] Test StateOp bufferization --- frontend/catalyst/compiler.py | 4 ++-- .../BufferizableOpInterfaceImpl.cpp | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 6a1bf8242c..85d3172db6 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,8 +195,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "one-shot-bufferize{bufferize-function-boundaries}", - "func.func(buffer-hoisting)", - "func.func(buffer-loop-hoisting)", + #"func.func(buffer-hoisting)", + #"func.func(buffer-loop-hoisting)", #"buffer-results-to-out-params", #"drop-equivalent-buffer-results", #"func.func(promote-buffers-to-stack)", diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 11f3f614e9..457bd05613 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,5 +1,6 @@ -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Transforms/DialectConversion.h" #include "Quantum/IR/QuantumOps.h" #include "Quantum/Transforms/BufferizableOpInterfaceImpl.h" @@ -15,7 +16,7 @@ struct StateOpInterface catalyst::quantum::StateOp> { bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return true; + return false; } bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, @@ -31,7 +32,19 @@ struct StateOpInterface LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, const mlir::bufferization::BufferizationOptions &options) const { - auto StateOp = cast(op); + auto stateOp = cast(op); + Location loc = op->getLoc(); + FailureOr tensorAlloc = allocateTensorForShapedValue( + rewriter, loc, stateOp.getState(), options, + /*copy=*/false); + if (failed(tensorAlloc)) + return failure(); + llvm::outs() << "This rewrite happens!\n"; + auto tensorType = cast(tensorAlloc->getType()); + MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + //rewriter.create(loc, resultType); + Value allocVal = rewriter.replaceOpWithNewOp(stateOp, resultType); + rewriter.create(loc, cast(allocVal.getType())); return success(); } From a3816a6bce42565a53dd577734d28b02366d8717 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 27 Aug 2024 19:26:29 -0400 Subject: [PATCH 023/183] Use replaceOpWithBufferizedValues to get rid of segmentation fault --- .../BufferizableOpInterfaceImpl.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 457bd05613..b8dfdc3451 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -16,12 +16,12 @@ struct StateOpInterface catalyst::quantum::StateOp> { bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return false; + return true; } bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return false; + return true; } mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, @@ -34,17 +34,12 @@ struct StateOpInterface const mlir::bufferization::BufferizationOptions &options) const { auto stateOp = cast(op); Location loc = op->getLoc(); - FailureOr tensorAlloc = allocateTensorForShapedValue( - rewriter, loc, stateOp.getState(), options, - /*copy=*/false); - if (failed(tensorAlloc)) - return failure(); - llvm::outs() << "This rewrite happens!\n"; - auto tensorType = cast(tensorAlloc->getType()); + auto tensorType = cast(stateOp.getState().getType()); MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - //rewriter.create(loc, resultType); - Value allocVal = rewriter.replaceOpWithNewOp(stateOp, resultType); - rewriter.create(loc, cast(allocVal.getType())); + + Value allocVal = rewriter.create(loc, resultType); + rewriter.create(loc, TypeRange{}, ValueRange{stateOp.getObs(), allocVal}); + mlir::bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); return success(); } From fc46399e4c3bc5a59830483748648a7a48098782 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 11:21:51 -0400 Subject: [PATCH 024/183] Set buffer write for StateOp as false --- mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index b8dfdc3451..a3fd82fab3 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -21,7 +21,7 @@ struct StateOpInterface bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return true; + return false; } mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, From f02870939bed617fc7b33edd405222d53c982e9f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 11:47:02 -0400 Subject: [PATCH 025/183] Add new bufferization pass for ProbsOp --- .../BufferizableOpInterfaceImpl.cpp | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index a3fd82fab3..cc199c7c7d 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -10,7 +10,8 @@ using namespace catalyst::quantum; namespace { -/// Bufferization of tensor.extract. Replace with memref.load. +/// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new +/// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface : public mlir::bufferization::BufferizableOpInterface::ExternalModel { @@ -45,11 +46,48 @@ struct StateOpInterface } }; -} +/// Bufferization of catalyst.quantum.probs. Replace with memref.alloc and a new +/// catalyst.quantum.probs that uses the memory allocated by memref.alloc. +struct ProbsOpInterface + : public mlir::bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return false; + } + + mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, + mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, + const mlir::bufferization::BufferizationOptions &options) const { + auto probsOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(probsOp.getProbabilities().getType()); + MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + Value allocVal = rewriter.create(loc, resultType); + rewriter.create(loc, TypeRange{}, ValueRange{probsOp.getObs(), allocVal}); + mlir::bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + + return success(); + } +}; + +} // namespace void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { StateOp::attachInterface(*ctx); + ProbsOp::attachInterface(*ctx); }); } \ No newline at end of file From dc631a7489a6e3268f93a1780baa9aeac7d2ed6c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 12:20:51 -0400 Subject: [PATCH 026/183] Add new bufferization interface for CountsOp --- .../BufferizableOpInterfaceImpl.cpp | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index cc199c7c7d..c2607e84fd 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -82,6 +82,46 @@ struct ProbsOpInterface } }; +/// Bufferization of catalyst.quantum.counts. Replace with memref.alloc and a new +/// catalyst.quantum.counts that uses the memory allocated by memref.alloc. +struct CountsOpInterface + : public mlir::bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return false; + } + + mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, + mlir::OpOperand &opOperand, + const mlir::bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, + const mlir::bufferization::BufferizationOptions &options) const { + auto countsOp = cast(op); + Location loc = op->getLoc(); + auto tensorType0 = cast(countsOp.getEigvals().getType()); + auto tensorType1 = cast(countsOp.getCounts().getType()); + MemRefType resultType0 = MemRefType::get(tensorType0.getShape(), tensorType0.getElementType()); + MemRefType resultType1 = MemRefType::get(tensorType1.getShape(), tensorType1.getElementType()); + + Value allocVal0 = rewriter.create(loc, resultType0); + Value allocVal1 = rewriter.create(loc, resultType1); + rewriter.create(loc, nullptr, nullptr, countsOp.getObs(), allocVal0, allocVal1, + countsOp.getShotsAttr()); + mlir::bufferization::replaceOpWithBufferizedValues(rewriter, op, ValueRange{allocVal0, allocVal1}); + + return success(); + } +}; + } // namespace void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( @@ -89,5 +129,6 @@ void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { StateOp::attachInterface(*ctx); ProbsOp::attachInterface(*ctx); + CountsOp::attachInterface(*ctx); }); } \ No newline at end of file From b8181b8134e12020eecaabdc81b95194a7addb24 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 14:14:57 -0400 Subject: [PATCH 027/183] Correct the read flag for StateOp like operations --- mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index c2607e84fd..c42a973d4c 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -17,7 +17,7 @@ struct StateOpInterface catalyst::quantum::StateOp> { bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return true; + return false; } bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, @@ -53,7 +53,7 @@ struct ProbsOpInterface catalyst::quantum::ProbsOp> { bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return true; + return false; } bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, @@ -89,7 +89,7 @@ struct CountsOpInterface catalyst::quantum::CountsOp> { bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, const mlir::bufferization::AnalysisState &state) const { - return true; + return false; } bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, From 849e884bd363f37029e345dcf246d77db3e166d1 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 15:52:35 -0400 Subject: [PATCH 028/183] Add new set_state bufferization --- .../BufferizableOpInterfaceImpl.cpp | 110 ++++++++++++------ 1 file changed, 75 insertions(+), 35 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index c42a973d4c..90dc64e523 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,4 +1,5 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Transforms/DialectConversion.h" @@ -13,26 +14,26 @@ namespace { /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface - : public mlir::bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return false; } - bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return false; } - mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, - mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return {}; } - LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, - const mlir::bufferization::BufferizationOptions &options) const { + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { auto stateOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(stateOp.getState().getType()); @@ -40,7 +41,7 @@ struct StateOpInterface Value allocVal = rewriter.create(loc, resultType); rewriter.create(loc, TypeRange{}, ValueRange{stateOp.getObs(), allocVal}); - mlir::bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); return success(); } @@ -49,26 +50,26 @@ struct StateOpInterface /// Bufferization of catalyst.quantum.probs. Replace with memref.alloc and a new /// catalyst.quantum.probs that uses the memory allocated by memref.alloc. struct ProbsOpInterface - : public mlir::bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return false; } - bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return false; } - mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, - mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return {}; } - LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, - const mlir::bufferization::BufferizationOptions &options) const { + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { auto probsOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(probsOp.getProbabilities().getType()); @@ -76,35 +77,35 @@ struct ProbsOpInterface Value allocVal = rewriter.create(loc, resultType); rewriter.create(loc, TypeRange{}, ValueRange{probsOp.getObs(), allocVal}); - mlir::bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); return success(); } }; -/// Bufferization of catalyst.quantum.counts. Replace with memref.alloc and a new -/// catalyst.quantum.counts that uses the memory allocated by memref.alloc. +/// Bufferization of catalyst.quantum.counts. Replace with memref.allocs and a new +/// catalyst.quantum.counts that uses the memory allocated by memref.allocs. struct CountsOpInterface - : public mlir::bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(mlir::Operation *op, mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return false; } - bool bufferizesToMemoryWrite(mlir::Operation *op, mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return false; } - mlir::bufferization::AliasingValueList getAliasingValues(mlir::Operation *op, - mlir::OpOperand &opOperand, - const mlir::bufferization::AnalysisState &state) const { + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { return {}; } - LogicalResult bufferize(mlir::Operation *op, RewriterBase &rewriter, - const mlir::bufferization::BufferizationOptions &options) const { + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { auto countsOp = cast(op); Location loc = op->getLoc(); auto tensorType0 = cast(countsOp.getEigvals().getType()); @@ -116,12 +117,50 @@ struct CountsOpInterface Value allocVal1 = rewriter.create(loc, resultType1); rewriter.create(loc, nullptr, nullptr, countsOp.getObs(), allocVal0, allocVal1, countsOp.getShotsAttr()); - mlir::bufferization::replaceOpWithBufferizedValues(rewriter, op, ValueRange{allocVal0, allocVal1}); + bufferization::replaceOpWithBufferizedValues(rewriter, op, ValueRange{allocVal0, allocVal1}); return success(); } }; +/// Bufferization of catalyst.quantum.set_state. Replace with bufferization::ToMemrefOp and +/// a new catalyst.quantum.set_state that uses the memref from bufferization::ToMemrefOp. +struct SetStateOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto setStateOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(setStateOp.getInState().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + auto toMemrefOp = rewriter.create(loc, memrefType, + setStateOp.getInState()); + auto memref = toMemrefOp.getResult(); + auto newSetStateOp = rewriter.create(loc, setStateOp.getOutQubits().getTypes(), + memref, setStateOp.getInQubits()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); + return success(); + } +}; + } // namespace void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( @@ -130,5 +169,6 @@ void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( StateOp::attachInterface(*ctx); ProbsOp::attachInterface(*ctx); CountsOp::attachInterface(*ctx); + SetStateOp::attachInterface(*ctx); }); } \ No newline at end of file From 39f175714a49f6931654279fa3005b98c22f8fd6 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 16:23:54 -0400 Subject: [PATCH 029/183] Add SetBasisStateOp bufferization --- .../BufferizableOpInterfaceImpl.cpp | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 90dc64e523..ccfbdc1c9d 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -161,6 +161,44 @@ struct SetStateOpInterface } }; +/// Bufferization of catalyst.quantum.set_basic_state. Replace with bufferization::ToMemrefOp and +/// a new catalyst.quantum.set_basic_state that uses the memref from bufferization::ToMemrefOp. +struct SetBasisStateOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto setBasisStateOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(setBasisStateOp.getBasisState().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + auto toMemrefOp = rewriter.create(loc, memrefType, + setBasisStateOp.getBasisState()); + auto memref = toMemrefOp.getResult(); + auto newSetStateOp = rewriter.create(loc, setBasisStateOp.getOutQubits().getTypes(), + memref, setBasisStateOp.getInQubits()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); + return success(); + } +}; + } // namespace void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( @@ -170,5 +208,6 @@ void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( ProbsOp::attachInterface(*ctx); CountsOp::attachInterface(*ctx); SetStateOp::attachInterface(*ctx); + SetBasisStateOp::attachInterface(*ctx); }); } \ No newline at end of file From b5157fff5846702b08d9ddd458d9c8278acc5d29 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 17:17:07 -0400 Subject: [PATCH 030/183] Add QubitUnitary Bufferization --- .../BufferizableOpInterfaceImpl.cpp | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index ccfbdc1c9d..9b9885d897 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,6 +11,46 @@ using namespace catalyst::quantum; namespace { +/// Bufferization of catalyst.quantum.state. Convert Matrix into memref. +struct QubitUnitaryOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto qubitUnitaryOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(qubitUnitaryOp.getMatrix().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto toMemrefOp = rewriter.create(loc, memrefType, + qubitUnitaryOp.getMatrix()); + auto memref = toMemrefOp.getResult(); + auto newQubitUnitaryOp = rewriter.create( + loc, qubitUnitaryOp.getOutQubits().getTypes(), + qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, + qubitUnitaryOp.getInQubits(), qubitUnitaryOp.getAdjointAttr(), + qubitUnitaryOp.getInCtrlQubits(), qubitUnitaryOp.getInCtrlValues()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newQubitUnitaryOp.getOutQubits()); + + return success(); + } +}; + /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface @@ -123,8 +163,7 @@ struct CountsOpInterface } }; -/// Bufferization of catalyst.quantum.set_state. Replace with bufferization::ToMemrefOp and -/// a new catalyst.quantum.set_state that uses the memref from bufferization::ToMemrefOp. +/// Bufferization of catalyst.quantum.set_state. Convert InState into memref. struct SetStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { @@ -161,8 +200,7 @@ struct SetStateOpInterface } }; -/// Bufferization of catalyst.quantum.set_basic_state. Replace with bufferization::ToMemrefOp and -/// a new catalyst.quantum.set_basic_state that uses the memref from bufferization::ToMemrefOp. +/// Bufferization of catalyst.quantum.set_basic_state. Convert BasisState into memref. struct SetBasisStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { @@ -204,6 +242,7 @@ struct SetBasisStateOpInterface void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { + QubitUnitaryOp::attachInterface(*ctx); StateOp::attachInterface(*ctx); ProbsOp::attachInterface(*ctx); CountsOp::attachInterface(*ctx); From 5556493f6239390a85ff0a5a30cc1c1516dd04df Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 17:34:40 -0400 Subject: [PATCH 031/183] Add hermitian bufferization --- .../BufferizableOpInterfaceImpl.cpp | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 9b9885d897..50e0ff6497 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,7 +11,7 @@ using namespace catalyst::quantum; namespace { -/// Bufferization of catalyst.quantum.state. Convert Matrix into memref. +/// Bufferization of catalyst.quantum.unitary. Convert Matrix into memref. struct QubitUnitaryOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { @@ -51,6 +51,43 @@ struct QubitUnitaryOpInterface } }; +/// Bufferization of catalyst.quantum.hermitian. Convert Matrix into memref. +struct HermitianOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto hermitianOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(hermitianOp.getMatrix().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto toMemrefOp = rewriter.create(loc, memrefType, + hermitianOp.getMatrix()); + auto memref = toMemrefOp.getResult(); + auto newHermitianOp = rewriter.create(loc, hermitianOp.getType(), memref, + hermitianOp.getQubits()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newHermitianOp.getObs()); + + return success(); + } +}; + /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface @@ -243,6 +280,7 @@ void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { QubitUnitaryOp::attachInterface(*ctx); + HermitianOp::attachInterface(*ctx); StateOp::attachInterface(*ctx); ProbsOp::attachInterface(*ctx); CountsOp::attachInterface(*ctx); From c3bf61abb15810edb208d784caa9edeb5bea123a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 17:44:58 -0400 Subject: [PATCH 032/183] Add Hamiltonian bufferization --- .../BufferizableOpInterfaceImpl.cpp | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 50e0ff6497..a43544357a 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -88,6 +88,43 @@ struct HermitianOpInterface } }; +/// Bufferization of catalyst.quantum.hamiltonian. Convert Matrix into memref. +struct HamiltonianOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto hamiltonianOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(hamiltonianOp.getCoeffs().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto toMemrefOp = rewriter.create(loc, memrefType, + hamiltonianOp.getCoeffs()); + auto memref = toMemrefOp.getResult(); + auto newHamiltonianOp = rewriter.create(loc, hamiltonianOp.getType(), memref, + hamiltonianOp.getTerms()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newHamiltonianOp.getObs()); + + return success(); + } +}; + /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface @@ -281,6 +318,7 @@ void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { QubitUnitaryOp::attachInterface(*ctx); HermitianOp::attachInterface(*ctx); + HamiltonianOp::attachInterface(*ctx); StateOp::attachInterface(*ctx); ProbsOp::attachInterface(*ctx); CountsOp::attachInterface(*ctx); From a9ed44b82baa0c891c518b9107328699cd8513a8 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 17:51:26 -0400 Subject: [PATCH 033/183] Remove redundant scope resolutions --- .../Transforms/BufferizableOpInterfaceImpl.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index a43544357a..76e073b9b2 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -14,7 +14,7 @@ namespace { /// Bufferization of catalyst.quantum.unitary. Convert Matrix into memref. struct QubitUnitaryOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + QubitUnitaryOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return true; @@ -54,7 +54,7 @@ struct QubitUnitaryOpInterface /// Bufferization of catalyst.quantum.hermitian. Convert Matrix into memref. struct HermitianOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + HermitianOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return true; @@ -91,7 +91,7 @@ struct HermitianOpInterface /// Bufferization of catalyst.quantum.hamiltonian. Convert Matrix into memref. struct HamiltonianOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + HamiltonianOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return true; @@ -129,7 +129,7 @@ struct HamiltonianOpInterface /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + StateOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return false; @@ -165,7 +165,7 @@ struct StateOpInterface /// catalyst.quantum.probs that uses the memory allocated by memref.alloc. struct ProbsOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + ProbsOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return false; @@ -201,7 +201,7 @@ struct ProbsOpInterface /// catalyst.quantum.counts that uses the memory allocated by memref.allocs. struct CountsOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + CountsOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return false; @@ -240,7 +240,7 @@ struct CountsOpInterface /// Bufferization of catalyst.quantum.set_state. Convert InState into memref. struct SetStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + SetStateOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return false; @@ -277,7 +277,7 @@ struct SetStateOpInterface /// Bufferization of catalyst.quantum.set_basic_state. Convert BasisState into memref. struct SetBasisStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + SetBasisStateOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return false; @@ -315,7 +315,7 @@ struct SetBasisStateOpInterface void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { - registry.addExtension(+[](MLIRContext *ctx, catalyst::quantum::QuantumDialect *dialect) { + registry.addExtension(+[](MLIRContext *ctx, QuantumDialect *dialect) { QubitUnitaryOp::attachInterface(*ctx); HermitianOp::attachInterface(*ctx); HamiltonianOp::attachInterface(*ctx); From b54c10437592a41279179bfff1caa02c082c1024 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 28 Aug 2024 18:01:00 -0400 Subject: [PATCH 034/183] Add SampleOp bufferization --- .../BufferizableOpInterfaceImpl.cpp | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 76e073b9b2..3e7b081b01 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -125,6 +125,43 @@ struct HamiltonianOpInterface } }; +/// Bufferization of catalyst.quantum.sample. Replace with memref.alloc and a new +/// catalyst.quantum.sample that uses the memory allocated by memref.alloc. +struct SampleOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto sampleOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(sampleOp.getSamples().getType()); + MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + Value allocVal = rewriter.create(loc, resultType); + rewriter.create(loc, TypeRange{}, ValueRange{sampleOp.getObs(), allocVal}, + sampleOp->getAttrs()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + + return success(); + } +}; + /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface @@ -319,6 +356,7 @@ void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( QubitUnitaryOp::attachInterface(*ctx); HermitianOp::attachInterface(*ctx); HamiltonianOp::attachInterface(*ctx); + SampleOp::attachInterface(*ctx); StateOp::attachInterface(*ctx); ProbsOp::attachInterface(*ctx); CountsOp::attachInterface(*ctx); From 440b926f81c22ba56a92786b8e757670b0a94f70 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 10:35:57 -0400 Subject: [PATCH 035/183] Draft bufferization templates for Catalyst and Gradient IRs --- .../Transforms/BufferizableOpInterfaceImpl.h | 11 ++++++++++ .../Transforms/BufferizableOpInterfaceImpl.h | 13 ++++++++++++ .../BufferizableOpInterfaceImpl.cpp | 21 +++++++++++++++++++ mlir/lib/Catalyst/Transforms/CMakeLists.txt | 1 + mlir/lib/Driver/CompilerDriver.cpp | 6 +++++- .../BufferizableOpInterfaceImpl.cpp | 21 +++++++++++++++++++ mlir/lib/Gradient/Transforms/CMakeLists.txt | 1 + 7 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h create mode 100644 mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h create mode 100644 mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp create mode 100644 mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp diff --git a/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h new file mode 100644 index 0000000000..9c0e982cb0 --- /dev/null +++ b/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h @@ -0,0 +1,11 @@ +#pragma once + +using namespace mlir; + +namespace catalyst { + + +void registerBufferizableOpInterfaceExternalModels(mlir::DialectRegistry ®istry); + + +} // namespace catalyst \ No newline at end of file diff --git a/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h new file mode 100644 index 0000000000..c8426be8b3 --- /dev/null +++ b/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h @@ -0,0 +1,13 @@ +#pragma once + +using namespace mlir; + +namespace catalyst { + +namespace gradient { + +void registerBufferizableOpInterfaceExternalModels(mlir::DialectRegistry ®istry); + +} + +} // namespace catalyst \ No newline at end of file diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp new file mode 100644 index 0000000000..a1ee1289ba --- /dev/null +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -0,0 +1,21 @@ +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "Catalyst/IR/CatalystOps.h" +#include "Catalyst/Transforms/BufferizableOpInterfaceImpl.h" + +using namespace mlir; +using namespace catalyst; + +namespace { + +} // namespace + +void catalyst::registerBufferizableOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { + + }); +} \ No newline at end of file diff --git a/mlir/lib/Catalyst/Transforms/CMakeLists.txt b/mlir/lib/Catalyst/Transforms/CMakeLists.txt index 71e8fc968c..e7113aa778 100644 --- a/mlir/lib/Catalyst/Transforms/CMakeLists.txt +++ b/mlir/lib/Catalyst/Transforms/CMakeLists.txt @@ -7,6 +7,7 @@ file(GLOB SRC qnode_to_async_lowering.cpp QnodeToAsyncPatterns.cpp RegisterAllPasses.cpp + BufferizableOpInterfaceImpl.cpp BufferizationPatterns.cpp catalyst_bufferize.cpp catalyst_to_llvm.cpp diff --git a/mlir/lib/Driver/CompilerDriver.cpp b/mlir/lib/Driver/CompilerDriver.cpp index 07d92c6a58..67533ad2ef 100644 --- a/mlir/lib/Driver/CompilerDriver.cpp +++ b/mlir/lib/Driver/CompilerDriver.cpp @@ -53,18 +53,20 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "Catalyst/IR/CatalystDialect.h" +#include "Catalyst/Transforms/BufferizableOpInterfaceImpl.h" #include "Catalyst/Transforms/Passes.h" #include "Driver/CatalystLLVMTarget.h" #include "Driver/CompilerDriver.h" #include "Driver/Support.h" #include "Gradient/IR/GradientDialect.h" #include "Gradient/IR/GradientInterfaces.h" +#include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" #include "Gradient/Transforms/Passes.h" #include "Mitigation/IR/MitigationDialect.h" #include "Mitigation/Transforms/Passes.h" #include "Quantum/IR/QuantumDialect.h" -#include "Quantum/Transforms/Passes.h" #include "Quantum/Transforms/BufferizableOpInterfaceImpl.h" +#include "Quantum/Transforms/Passes.h" #include "Enzyme.h" #include "Timer.hpp" @@ -304,7 +306,9 @@ void registerAllCatalystDialects(DialectRegistry ®istry) registry.insert(); // Extend one-shot bufferization pass. + catalyst::registerBufferizableOpInterfaceExternalModels(registry); catalyst::quantum::registerBufferizableOpInterfaceExternalModels(registry); + catalyst::gradient::registerBufferizableOpInterfaceExternalModels(registry); } } // namespace diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp new file mode 100644 index 0000000000..dee62249f4 --- /dev/null +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -0,0 +1,21 @@ +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "Gradient/IR/GradientOps.h" +#include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" + +using namespace mlir; +using namespace catalyst::gradient; + +namespace { + +} // namespace + +void catalyst::gradient::registerBufferizableOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, GradientDialect *dialect) { + + }); +} \ No newline at end of file diff --git a/mlir/lib/Gradient/Transforms/CMakeLists.txt b/mlir/lib/Gradient/Transforms/CMakeLists.txt index 7c49addbec..82c00aa58a 100644 --- a/mlir/lib/Gradient/Transforms/CMakeLists.txt +++ b/mlir/lib/Gradient/Transforms/CMakeLists.txt @@ -2,6 +2,7 @@ set(LIBRARY_NAME gradient-transforms) file(GLOB SRC GradMethods/*.cpp + BufferizableOpInterfaceImpl.cpp BufferizationPatterns.cpp gradient_bufferize.cpp LoweringPatterns.cpp From 159057c863afc1bbd79752a3600ae605539ee0f7 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 11:35:15 -0400 Subject: [PATCH 036/183] Add printOp Bufferization --- .../BufferizableOpInterfaceImpl.cpp | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index a1ee1289ba..3302b1ea92 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,11 +11,45 @@ using namespace catalyst; namespace { +/// Bufferization of catalyst.quantum.hermitian. Convert Matrix into memref. +struct PrintOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto printOp = cast(op); + if (printOp.getVal()) { + FailureOr source = getBuffer(rewriter, printOp.getVal(), options); + if (failed(source)) + return failure(); + bufferization::replaceOpWithNewBufferizedOp(rewriter, op, *source, + printOp.getConstValAttr(), printOp.getPrintDescriptorAttr()); + } + return success(); + } +}; + } // namespace void catalyst::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { - + PrintOp::attachInterface(*ctx); }); } \ No newline at end of file From 81394aef9cdf5db3cedb2f4093f84da6c1973074 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 11:59:05 -0400 Subject: [PATCH 037/183] Fix indentation --- .../BufferizableOpInterfaceImpl.cpp | 572 +++++++++--------- 1 file changed, 286 insertions(+), 286 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 3e7b081b01..8e09a014eb 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -15,114 +15,114 @@ namespace { struct QubitUnitaryOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return true; - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } - bufferization::AliasingValueList getAliasingValues(Operation *op, + bufferization::AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto qubitUnitaryOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(qubitUnitaryOp.getMatrix().getType()); - MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - qubitUnitaryOp.getMatrix()); - auto memref = toMemrefOp.getResult(); - auto newQubitUnitaryOp = rewriter.create( - loc, qubitUnitaryOp.getOutQubits().getTypes(), - qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, - qubitUnitaryOp.getInQubits(), qubitUnitaryOp.getAdjointAttr(), - qubitUnitaryOp.getInCtrlQubits(), qubitUnitaryOp.getInCtrlValues()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, newQubitUnitaryOp.getOutQubits()); - - return success(); - } + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto qubitUnitaryOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(qubitUnitaryOp.getMatrix().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto toMemrefOp = rewriter.create(loc, memrefType, + qubitUnitaryOp.getMatrix()); + auto memref = toMemrefOp.getResult(); + auto newQubitUnitaryOp = rewriter.create( + loc, qubitUnitaryOp.getOutQubits().getTypes(), + qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, + qubitUnitaryOp.getInQubits(), qubitUnitaryOp.getAdjointAttr(), + qubitUnitaryOp.getInCtrlQubits(), qubitUnitaryOp.getInCtrlValues()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newQubitUnitaryOp.getOutQubits()); + + return success(); + } }; /// Bufferization of catalyst.quantum.hermitian. Convert Matrix into memref. struct HermitianOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return true; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto hermitianOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(hermitianOp.getMatrix().getType()); - MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - hermitianOp.getMatrix()); - auto memref = toMemrefOp.getResult(); - auto newHermitianOp = rewriter.create(loc, hermitianOp.getType(), memref, - hermitianOp.getQubits()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, newHermitianOp.getObs()); - - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto hermitianOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(hermitianOp.getMatrix().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto toMemrefOp = rewriter.create(loc, memrefType, + hermitianOp.getMatrix()); + auto memref = toMemrefOp.getResult(); + auto newHermitianOp = rewriter.create(loc, hermitianOp.getType(), memref, + hermitianOp.getQubits()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newHermitianOp.getObs()); + + return success(); + } }; /// Bufferization of catalyst.quantum.hamiltonian. Convert Matrix into memref. struct HamiltonianOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return true; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto hamiltonianOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(hamiltonianOp.getCoeffs().getType()); - MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - hamiltonianOp.getCoeffs()); - auto memref = toMemrefOp.getResult(); - auto newHamiltonianOp = rewriter.create(loc, hamiltonianOp.getType(), memref, - hamiltonianOp.getTerms()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, newHamiltonianOp.getObs()); - - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto hamiltonianOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(hamiltonianOp.getCoeffs().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto toMemrefOp = rewriter.create(loc, memrefType, + hamiltonianOp.getCoeffs()); + auto memref = toMemrefOp.getResult(); + auto newHamiltonianOp = rewriter.create(loc, hamiltonianOp.getType(), memref, + hamiltonianOp.getTerms()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newHamiltonianOp.getObs()); + + return success(); + } }; /// Bufferization of catalyst.quantum.sample. Replace with memref.alloc and a new @@ -130,36 +130,36 @@ struct HamiltonianOpInterface struct SampleOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto sampleOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(sampleOp.getSamples().getType()); - MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - - Value allocVal = rewriter.create(loc, resultType); - rewriter.create(loc, TypeRange{}, ValueRange{sampleOp.getObs(), allocVal}, - sampleOp->getAttrs()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); - - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto sampleOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(sampleOp.getSamples().getType()); + MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + Value allocVal = rewriter.create(loc, resultType); + rewriter.create(loc, TypeRange{}, ValueRange{sampleOp.getObs(), allocVal}, + sampleOp->getAttrs()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + + return success(); + } }; /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new @@ -167,35 +167,35 @@ struct SampleOpInterface struct StateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto stateOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(stateOp.getState().getType()); - MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - - Value allocVal = rewriter.create(loc, resultType); - rewriter.create(loc, TypeRange{}, ValueRange{stateOp.getObs(), allocVal}); - bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); - - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto stateOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(stateOp.getState().getType()); + MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + Value allocVal = rewriter.create(loc, resultType); + rewriter.create(loc, TypeRange{}, ValueRange{stateOp.getObs(), allocVal}); + bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + + return success(); + } }; /// Bufferization of catalyst.quantum.probs. Replace with memref.alloc and a new @@ -203,35 +203,35 @@ struct StateOpInterface struct ProbsOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto probsOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(probsOp.getProbabilities().getType()); - MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - - Value allocVal = rewriter.create(loc, resultType); - rewriter.create(loc, TypeRange{}, ValueRange{probsOp.getObs(), allocVal}); - bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); - - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto probsOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(probsOp.getProbabilities().getType()); + MemRefType resultType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + Value allocVal = rewriter.create(loc, resultType); + rewriter.create(loc, TypeRange{}, ValueRange{probsOp.getObs(), allocVal}); + bufferization::replaceOpWithBufferizedValues(rewriter, op, allocVal); + + return success(); + } }; /// Bufferization of catalyst.quantum.counts. Replace with memref.allocs and a new @@ -239,128 +239,128 @@ struct ProbsOpInterface struct CountsOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto countsOp = cast(op); - Location loc = op->getLoc(); - auto tensorType0 = cast(countsOp.getEigvals().getType()); - auto tensorType1 = cast(countsOp.getCounts().getType()); - MemRefType resultType0 = MemRefType::get(tensorType0.getShape(), tensorType0.getElementType()); - MemRefType resultType1 = MemRefType::get(tensorType1.getShape(), tensorType1.getElementType()); - - Value allocVal0 = rewriter.create(loc, resultType0); - Value allocVal1 = rewriter.create(loc, resultType1); - rewriter.create(loc, nullptr, nullptr, countsOp.getObs(), allocVal0, allocVal1, - countsOp.getShotsAttr()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, ValueRange{allocVal0, allocVal1}); - - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto countsOp = cast(op); + Location loc = op->getLoc(); + auto tensorType0 = cast(countsOp.getEigvals().getType()); + auto tensorType1 = cast(countsOp.getCounts().getType()); + MemRefType resultType0 = MemRefType::get(tensorType0.getShape(), tensorType0.getElementType()); + MemRefType resultType1 = MemRefType::get(tensorType1.getShape(), tensorType1.getElementType()); + + Value allocVal0 = rewriter.create(loc, resultType0); + Value allocVal1 = rewriter.create(loc, resultType1); + rewriter.create(loc, nullptr, nullptr, countsOp.getObs(), allocVal0, allocVal1, + countsOp.getShotsAttr()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, ValueRange{allocVal0, allocVal1}); + + return success(); + } }; /// Bufferization of catalyst.quantum.set_state. Convert InState into memref. struct SetStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return true; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto setStateOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(setStateOp.getInState().getType()); - MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - - auto toMemrefOp = rewriter.create(loc, memrefType, - setStateOp.getInState()); - auto memref = toMemrefOp.getResult(); - auto newSetStateOp = rewriter.create(loc, setStateOp.getOutQubits().getTypes(), - memref, setStateOp.getInQubits()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto setStateOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(setStateOp.getInState().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + auto toMemrefOp = rewriter.create(loc, memrefType, + setStateOp.getInState()); + auto memref = toMemrefOp.getResult(); + auto newSetStateOp = rewriter.create(loc, setStateOp.getOutQubits().getTypes(), + memref, setStateOp.getInQubits()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); + return success(); + } }; /// Bufferization of catalyst.quantum.set_basic_state. Convert BasisState into memref. struct SetBasisStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return true; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto setBasisStateOp = cast(op); - Location loc = op->getLoc(); - auto tensorType = cast(setBasisStateOp.getBasisState().getType()); - MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - - auto toMemrefOp = rewriter.create(loc, memrefType, - setBasisStateOp.getBasisState()); - auto memref = toMemrefOp.getResult(); - auto newSetStateOp = rewriter.create(loc, setBasisStateOp.getOutQubits().getTypes(), - memref, setBasisStateOp.getInQubits()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); - return success(); - } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto setBasisStateOp = cast(op); + Location loc = op->getLoc(); + auto tensorType = cast(setBasisStateOp.getBasisState().getType()); + MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + + auto toMemrefOp = rewriter.create(loc, memrefType, + setBasisStateOp.getBasisState()); + auto memref = toMemrefOp.getResult(); + auto newSetStateOp = rewriter.create(loc, setBasisStateOp.getOutQubits().getTypes(), + memref, setBasisStateOp.getInQubits()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); + return success(); + } }; } // namespace void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { - registry.addExtension(+[](MLIRContext *ctx, QuantumDialect *dialect) { - QubitUnitaryOp::attachInterface(*ctx); - HermitianOp::attachInterface(*ctx); - HamiltonianOp::attachInterface(*ctx); - SampleOp::attachInterface(*ctx); - StateOp::attachInterface(*ctx); - ProbsOp::attachInterface(*ctx); - CountsOp::attachInterface(*ctx); - SetStateOp::attachInterface(*ctx); - SetBasisStateOp::attachInterface(*ctx); - }); + registry.addExtension(+[](MLIRContext *ctx, QuantumDialect *dialect) { + QubitUnitaryOp::attachInterface(*ctx); + HermitianOp::attachInterface(*ctx); + HamiltonianOp::attachInterface(*ctx); + SampleOp::attachInterface(*ctx); + StateOp::attachInterface(*ctx); + ProbsOp::attachInterface(*ctx); + CountsOp::attachInterface(*ctx); + SetStateOp::attachInterface(*ctx); + SetBasisStateOp::attachInterface(*ctx); + }); } \ No newline at end of file From e6df804bf5d4fe5b7925a16aeaf403aba9f8c5a4 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 12:03:41 -0400 Subject: [PATCH 038/183] Remove redundant imclude from QuantumDialect --- mlir/lib/Quantum/IR/QuantumDialect.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlir/lib/Quantum/IR/QuantumDialect.cpp b/mlir/lib/Quantum/IR/QuantumDialect.cpp index 04bfe34f2c..385f4e0ae5 100644 --- a/mlir/lib/Quantum/IR/QuantumDialect.cpp +++ b/mlir/lib/Quantum/IR/QuantumDialect.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/IR/DialectImplementation.h" // needed for generated type parser #include "llvm/ADT/TypeSwitch.h" // needed for generated type parser @@ -44,8 +43,6 @@ void QuantumDialect::initialize() #define GET_OP_LIST #include "Quantum/IR/QuantumOps.cpp.inc" >(); - - declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// From 980b0dbfe24a7c7f854deebf3459d4cd01c7b961 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 13:04:09 -0400 Subject: [PATCH 039/183] Fix the other indentations --- .../BufferizableOpInterfaceImpl.cpp | 64 ++++++++++--------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 3302b1ea92..f5101fe3e1 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,45 +11,47 @@ using namespace catalyst; namespace { -/// Bufferization of catalyst.quantum.hermitian. Convert Matrix into memref. +/// Bufferization of catalyst.print. Get memref of printOp.val. struct PrintOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return true; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return false; - } - - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { - return {}; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { - auto printOp = cast(op); - if (printOp.getVal()) { - FailureOr source = getBuffer(rewriter, printOp.getVal(), options); - if (failed(source)) - return failure(); - bufferization::replaceOpWithNewBufferizedOp(rewriter, op, *source, - printOp.getConstValAttr(), printOp.getPrintDescriptorAttr()); + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; } - return success(); - } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto printOp = cast(op); + if (printOp.getVal()) { + FailureOr source = getBuffer(rewriter, printOp.getVal(), options); + if (failed(source)) + return failure(); + bufferization::replaceOpWithNewBufferizedOp(rewriter, op, *source, + printOp.getConstValAttr(), printOp.getPrintDescriptorAttr()); + } + return success(); + } + }; } // namespace void catalyst::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { - registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { - PrintOp::attachInterface(*ctx); - }); + registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { + //CustomCallOp::attachInterface(*ctx); + PrintOp::attachInterface(*ctx); + }); } \ No newline at end of file From 01df77ea054400987a00859af96315d70b40cec9 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 13:30:15 -0400 Subject: [PATCH 040/183] Add CustomCall bufferization --- .../BufferizableOpInterfaceImpl.cpp | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index f5101fe3e1..4e03cdebb7 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -43,7 +43,72 @@ struct PrintOpInterface } return success(); } +}; + +struct CustomCallOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto customCallOp = cast(op); + + // Add bufferized arguments + SmallVector bufferArgs; + ValueRange operands = customCallOp.getOperands(); + for (Value operand : operands) { + FailureOr opBuffer = getBuffer(rewriter, operand, options); + if (failed(opBuffer)) + return failure(); + bufferArgs.push_back(*opBuffer); + } + // Add bufferized return values to the arguments + ValueRange results = customCallOp.getResults(); + for (Value result : results) { + Type resultType = result.getType(); + RankedTensorType tensorType = dyn_cast(resultType); + if (!tensorType) { + return failure(); + } + auto options = bufferization::BufferizationOptions(); + FailureOr tensorAlloc = bufferization::allocateTensorForShapedValue( + rewriter, op->getLoc(), result, options, false); + MemRefType memrefType = + MemRefType::get(tensorType.getShape(), tensorType.getElementType()); + auto newBuffer = + rewriter.create(op->getLoc(), memrefType, *tensorAlloc); + bufferArgs.push_back(newBuffer); + } + + // Add the initial number of arguments + int32_t numArguments = static_cast(customCallOp.getNumOperands()); + DenseI32ArrayAttr numArgumentsDenseAttr = rewriter.getDenseI32ArrayAttr({numArguments}); + + // Create an updated custom call operation + rewriter.create(op->getLoc(), TypeRange{}, bufferArgs, + customCallOp.getCallTargetName(), numArgumentsDenseAttr); + size_t startIndex = bufferArgs.size() - customCallOp.getNumResults(); + SmallVector bufferResults(bufferArgs.begin() + startIndex, bufferArgs.end()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, bufferResults); + + return success(); + } }; } // namespace @@ -51,7 +116,7 @@ struct PrintOpInterface void catalyst::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { - //CustomCallOp::attachInterface(*ctx); + CustomCallOp::attachInterface(*ctx); PrintOp::attachInterface(*ctx); }); } \ No newline at end of file From 4a90eefa5fdce2e099f6602f81c95879fb78f8b4 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 13:34:46 -0400 Subject: [PATCH 041/183] Set write flag of custom call as false --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 4e03cdebb7..a1cd08200d 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -55,7 +55,7 @@ struct CustomCallOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return true; + return false; } bufferization::AliasingValueList getAliasingValues(Operation *op, From c4547ae733202c12ca31f0f8571b3c0e228935f0 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 15:25:34 -0400 Subject: [PATCH 042/183] Tentatively add CallbackCallOp bufferization --- .../BufferizableOpInterfaceImpl.cpp | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index a1cd08200d..d6dc6b7800 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -45,6 +45,7 @@ struct PrintOpInterface } }; +/// Bufferization of catalyst.print. Mainly get buffers for arguments. struct CustomCallOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { @@ -111,6 +112,111 @@ struct CustomCallOpInterface } }; +struct CallbackOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto callbackOp = cast(op); + + // Only match here if we have all memref arguments and return values. + // Only match if we have result types. + if (!llvm::any_of(callbackOp.getArgumentTypes(), [](Type argType) { return !isa(argType); }) && + !llvm::any_of(callbackOp.getResultTypes(),[](Type argType) { return !isa(argType); }) && + !callbackOp.getResultTypes().empty()) { + + auto argTys = callbackOp.getArgumentTypes(); + auto retTys = callbackOp.getResultTypes(); + SmallVector emptyRets; + SmallVector args(argTys.begin(), argTys.end()); + args.insert(args.end(), retTys.begin(), retTys.end()); + auto callbackTy = rewriter.getFunctionType(args, emptyRets); + rewriter.modifyOpInPlace(op, [&] { callbackOp.setFunctionType(callbackTy); }); + } + + return success(); + } +}; + +struct CallbackCallOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto callOp = cast(op); + + if(callOp->getNumResults() != callOp.getResultTypes().size()) + return failure(); + + SmallVector newInputs; + auto operands = callOp.getOperands(); + for (Value operand : operands) { + FailureOr opBuffer = getBuffer(rewriter, operand, options); + if (failed(opBuffer)) + return failure(); + newInputs.push_back(*opBuffer); + } + auto results = callOp.getResults(); + + auto loc = callOp->getLoc(); + SmallVector outmemrefs; + for (auto result : results) { + FailureOr tensorAlloc = + bufferization::allocateTensorForShapedValue(rewriter, loc, result, options, false); + if (failed(tensorAlloc)) + return failure(); + + auto tensor = *tensorAlloc; + RankedTensorType tensorTy = cast(tensor.getType()); + auto shape = tensorTy.getShape(); + auto elementTy = tensorTy.getElementType(); + auto memrefType = MemRefType::get(shape, elementTy); + auto toMemrefOp = rewriter.create(loc, memrefType, tensor); + auto memref = toMemrefOp.getResult(); + outmemrefs.push_back(memref); + newInputs.push_back(memref); + } + + SmallVector emptyRets; + //rewriter.create(loc, emptyRets, callOp.getCallee(), newInputs); + bufferization::replaceOpWithNewBufferizedOp(rewriter, op, emptyRets, callOp.getCallee(), newInputs); + /*bufferization::replaceOpWithBufferizedValues(rewriter, callOp, outmemrefs);*/ + return success(); + } +}; + } // namespace void catalyst::registerBufferizableOpInterfaceExternalModels( @@ -118,5 +224,7 @@ void catalyst::registerBufferizableOpInterfaceExternalModels( registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { CustomCallOp::attachInterface(*ctx); PrintOp::attachInterface(*ctx); + //CallbackOp::attachInterface(*ctx); + CallbackCallOp::attachInterface(*ctx); }); } \ No newline at end of file From 0c2eea843fbde391a2c009553d89b22c73e3e7b6 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 15:47:11 -0400 Subject: [PATCH 043/183] Still create a new type converter in CallbackCallOp pass --- .../Transforms/BufferizableOpInterfaceImpl.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index d6dc6b7800..70df95336d 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,5 +1,6 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Transforms/DialectConversion.h" @@ -177,7 +178,13 @@ struct CallbackCallOpInterface const bufferization::BufferizationOptions &options) const { auto callOp = cast(op); - if(callOp->getNumResults() != callOp.getResultTypes().size()) + bufferization::BufferizeTypeConverter typeConverter; + + SmallVector convertedResults; + if (failed(typeConverter.convertTypes(callOp.getResultTypes(), convertedResults))) + return failure(); + + if(callOp->getNumResults() != convertedResults.size()) return failure(); SmallVector newInputs; @@ -210,9 +217,8 @@ struct CallbackCallOpInterface } SmallVector emptyRets; - //rewriter.create(loc, emptyRets, callOp.getCallee(), newInputs); - bufferization::replaceOpWithNewBufferizedOp(rewriter, op, emptyRets, callOp.getCallee(), newInputs); - /*bufferization::replaceOpWithBufferizedValues(rewriter, callOp, outmemrefs);*/ + bufferization::replaceOpWithNewBufferizedOp(rewriter, op, emptyRets, + callOp.getCallee(), newInputs); return success(); } }; @@ -224,7 +230,7 @@ void catalyst::registerBufferizableOpInterfaceExternalModels( registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { CustomCallOp::attachInterface(*ctx); PrintOp::attachInterface(*ctx); - //CallbackOp::attachInterface(*ctx); + CallbackOp::attachInterface(*ctx); CallbackCallOp::attachInterface(*ctx); }); } \ No newline at end of file From 71fa5d719252f48cb8756172d4711f5091bd070c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 29 Aug 2024 17:35:59 -0400 Subject: [PATCH 044/183] Use replaceOpWithBufferizedValues at the end of CallbackCallOp --- .../lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 70df95336d..7717b63f99 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -195,8 +195,8 @@ struct CallbackCallOpInterface return failure(); newInputs.push_back(*opBuffer); } - auto results = callOp.getResults(); + auto results = callOp.getResults(); auto loc = callOp->getLoc(); SmallVector outmemrefs; for (auto result : results) { @@ -217,8 +217,8 @@ struct CallbackCallOpInterface } SmallVector emptyRets; - bufferization::replaceOpWithNewBufferizedOp(rewriter, op, emptyRets, - callOp.getCallee(), newInputs); + rewriter.create(loc, emptyRets, callOp.getCallee(), newInputs); + bufferization::replaceOpWithBufferizedValues(rewriter, op, outmemrefs); return success(); } }; From 4295bfc994e5e8c7d1bd452db1065d9383437fee Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 30 Aug 2024 12:52:22 -0400 Subject: [PATCH 045/183] Add new passes to quantum-opt --- mlir/tools/quantum-opt/quantum-opt.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mlir/tools/quantum-opt/quantum-opt.cpp b/mlir/tools/quantum-opt/quantum-opt.cpp index b2c46933d5..96d9f022a9 100644 --- a/mlir/tools/quantum-opt/quantum-opt.cpp +++ b/mlir/tools/quantum-opt/quantum-opt.cpp @@ -26,12 +26,15 @@ #include "Catalyst/IR/CatalystDialect.h" #include "Catalyst/Transforms/Passes.h" +#include "Catalyst/Transforms/BufferizableOpInterfaceImpl.h" #include "Gradient/IR/GradientDialect.h" #include "Gradient/Transforms/Passes.h" +#include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" #include "Mitigation/IR/MitigationDialect.h" #include "Mitigation/Transforms/Passes.h" #include "Quantum/IR/QuantumDialect.h" #include "Quantum/Transforms/Passes.h" +#include "Quantum/Transforms/BufferizableOpInterfaceImpl.h" int main(int argc, char **argv) { @@ -50,6 +53,10 @@ int main(int argc, char **argv) registry.insert(); registry.insert(); + catalyst::registerBufferizableOpInterfaceExternalModels(registry); + catalyst::quantum::registerBufferizableOpInterfaceExternalModels(registry); + catalyst::gradient::registerBufferizableOpInterfaceExternalModels(registry); + return mlir::asMainReturnCode( mlir::MlirOptMain(argc, argv, "Quantum optimizer driver\n", registry)); } From c01dd1f019d970e1aea865430c98980dff4284fd Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 30 Aug 2024 18:05:34 -0400 Subject: [PATCH 046/183] Add AdjointOp bufferization --- .../BufferizableOpInterfaceImpl.cpp | 124 +++++++++++++++++- 1 file changed, 121 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index dee62249f4..69b15e890e 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,21 +1,139 @@ +#include "mlir/Conversion/LLVMCommon/MemRefBuilder.h" +#include "mlir/Conversion/LLVMCommon/Pattern.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Transforms/DialectConversion.h" #include "Gradient/IR/GradientOps.h" #include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" +#include "Quantum/IR/QuantumOps.h" using namespace mlir; using namespace catalyst::gradient; namespace { +constexpr int64_t UNKNOWN = ShapedType::kDynamic; + +LLVM::LLVMFuncOp ensureFunctionDeclaration(RewriterBase &rewriter, Operation *op, + StringRef fnSymbol, Type fnType) +{ + Operation *fnDecl = SymbolTable::lookupNearestSymbolFrom(op, rewriter.getStringAttr(fnSymbol)); + + if (!fnDecl) { + RewriterBase::InsertionGuard insertGuard(rewriter); + ModuleOp mod = op->getParentOfType(); + rewriter.setInsertionPointToStart(mod.getBody()); + + fnDecl = rewriter.create(op->getLoc(), fnSymbol, fnType); + } + else { + assert(isa(fnDecl) && "QIR function declaration is not a LLVMFuncOp"); + } + + return cast(fnDecl); +} + +struct AdjointOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return false; + } + + bufferization::AliasingValueList getAliasingValues(Operation *op, + OpOperand &opOperand, + const bufferization::AnalysisState &state) const { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const { + auto adjointOp = cast(op); + MLIRContext *ctx = rewriter.getContext(); + Location loc = op->getLoc(); + LLVMTypeConverter typeConverter(ctx); + Type vectorType = typeConverter.convertType(MemRefType::get({UNKNOWN}, Float64Type::get(ctx))); + + for (Type type : adjointOp.getResultTypes()) { + if (!isa(type)) + return adjointOp.emitOpError("must be bufferized before lowering"); + + // Currently only expval gradients are supported by the runtime, + // leading to tensor return values. + if (dyn_cast(type) != MemRefType::get({UNKNOWN}, Float64Type::get(ctx))) + return adjointOp.emitOpError("adjoint can only return MemRef or tuple thereof"); + } + + // The callee of the adjoint op must return as a single result the quantum register. + func::FuncOp callee = + SymbolTable::lookupNearestSymbolFrom(adjointOp, adjointOp.getCalleeAttr()); + assert(callee && callee.getNumResults() == 1 && "invalid qfunc symbol in adjoint op"); + + StringRef cacheFnName = "__catalyst__rt__toggle_recorder"; + StringRef gradFnName = "__catalyst__qis__Gradient"; + Type cacheFnSignature = + LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(ctx), IntegerType::get(ctx, 1)); + Type gradFnSignature = LLVM::LLVMFunctionType::get( + LLVM::LLVMVoidType::get(ctx), IntegerType::get(ctx, 64), /*isVarArg=*/true); + + LLVM::LLVMFuncOp cacheFnDecl = + ensureFunctionDeclaration(rewriter, adjointOp, cacheFnName, cacheFnSignature); + LLVM::LLVMFuncOp gradFnDecl = + ensureFunctionDeclaration(rewriter, adjointOp, gradFnName, gradFnSignature); + + // Run the forward pass and cache the circuit. + Value c_true = rewriter.create( + loc, rewriter.getIntegerAttr(IntegerType::get(ctx, 1), 1)); + Value c_false = rewriter.create( + loc, rewriter.getIntegerAttr(IntegerType::get(ctx, 1), 0)); + rewriter.create(loc, cacheFnDecl, c_true); + Value qreg = rewriter.create(loc, callee, adjointOp.getArgs()).getResult(0); + if (!isa(qreg.getType())) + return callee.emitOpError("qfunc must return quantum register"); + rewriter.create(loc, cacheFnDecl, c_false); + + // We follow the C ABI convention of passing result memrefs as struct pointers in the + // arguments to the C function, although in this case as a variadic argument list to allow + // for a varying number of results in a single signature. + Value c1 = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); + Value numResults = rewriter.create( + loc, rewriter.getI64IntegerAttr(adjointOp.getDataIn().size())); + SmallVector args = {numResults}; + + // Might need to modify here. + for (Value memref : adjointOp.getDataIn()) { + Value newArg = rewriter.create( + loc, LLVM::LLVMPointerType::get(rewriter.getContext()), vectorType, c1); + rewriter.create(loc, memref, newArg); + args.push_back(newArg); + } + + rewriter.create(loc, gradFnDecl, args); + rewriter.create(loc, qreg); + mlir::bufferization::replaceOpWithNewBufferizedOp(rewriter, adjointOp); + + return success(); + } +}; + } // namespace void catalyst::gradient::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { - registry.addExtension(+[](MLIRContext *ctx, GradientDialect *dialect) { - - }); + registry.addExtension(+[](MLIRContext *ctx, GradientDialect *dialect) { + AdjointOp::attachInterface(*ctx); + }); } \ No newline at end of file From 45581c75360b735d8249739558fd97f29df1523f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 30 Aug 2024 18:13:05 -0400 Subject: [PATCH 047/183] Use BufferizeTypeConverter in new pass --- mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 69b15e890e..1dfb518e5e 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -4,6 +4,7 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -64,7 +65,7 @@ struct AdjointOpInterface auto adjointOp = cast(op); MLIRContext *ctx = rewriter.getContext(); Location loc = op->getLoc(); - LLVMTypeConverter typeConverter(ctx); + bufferization::BufferizeTypeConverter typeConverter; Type vectorType = typeConverter.convertType(MemRefType::get({UNKNOWN}, Float64Type::get(ctx))); for (Type type : adjointOp.getResultTypes()) { From 9ba2cd9cb12c36c985519634cc335ef10aa47ee7 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 3 Sep 2024 17:34:50 -0400 Subject: [PATCH 048/183] Make CallbackOp use Unstructured Control Flow Bufferizable OpInterface --- mlir/lib/Catalyst/IR/CatalystDialect.cpp | 3 ++ .../BufferizableOpInterfaceImpl.cpp | 50 +++++++++++++------ 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/mlir/lib/Catalyst/IR/CatalystDialect.cpp b/mlir/lib/Catalyst/IR/CatalystDialect.cpp index 20df887830..a94a9658c4 100644 --- a/mlir/lib/Catalyst/IR/CatalystDialect.cpp +++ b/mlir/lib/Catalyst/IR/CatalystDialect.cpp @@ -14,6 +14,7 @@ #include "Catalyst/IR/CatalystDialect.h" #include "Catalyst/IR/CatalystOps.h" +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/IR/Builders.h" #include "mlir/IR/DialectImplementation.h" // needed for generated type parser #include "mlir/Interfaces/FunctionImplementation.h" @@ -40,6 +41,8 @@ void CatalystDialect::initialize() #define GET_OP_LIST #include "Catalyst/IR/CatalystOps.cpp.inc" >(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 7717b63f99..f29cc6fe41 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,5 +1,6 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Transforms/DialectConversion.h" @@ -114,8 +115,24 @@ struct CustomCallOpInterface }; struct CallbackOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< + CallbackOpInterface, CallbackOp> { + + static bool supportsUnstructuredControlFlow() { return true; } + + bool hasTensorSemantics(Operation *op) const { + auto isaTensor = llvm::IsaPred; + + // A function has tensor semantics if it has tensor arguments/results. + auto callbackOp = cast(op); + bool hasTensorArg = any_of(callbackOp.getArgumentTypes(), isaTensor); + bool hasTensorResult = any_of(callbackOp.getResultTypes(), isaTensor); + if (hasTensorArg || hasTensorResult) + return true; + + return false; + } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { return true; @@ -136,20 +153,21 @@ struct CallbackOpInterface const bufferization::BufferizationOptions &options) const { auto callbackOp = cast(op); - // Only match here if we have all memref arguments and return values. - // Only match if we have result types. - if (!llvm::any_of(callbackOp.getArgumentTypes(), [](Type argType) { return !isa(argType); }) && - !llvm::any_of(callbackOp.getResultTypes(),[](Type argType) { return !isa(argType); }) && - !callbackOp.getResultTypes().empty()) { - - auto argTys = callbackOp.getArgumentTypes(); - auto retTys = callbackOp.getResultTypes(); - SmallVector emptyRets; - SmallVector args(argTys.begin(), argTys.end()); - args.insert(args.end(), retTys.begin(), retTys.end()); - auto callbackTy = rewriter.getFunctionType(args, emptyRets); - rewriter.modifyOpInPlace(op, [&] { callbackOp.setFunctionType(callbackTy); }); - } + auto argTys = callbackOp.getArgumentTypes(); + auto retTys = callbackOp.getResultTypes(); + SmallVector emptyRets; + SmallVector args(argTys.begin(), argTys.end()); + args.insert(args.end(), retTys.begin(), retTys.end()); + SmallVector bufferArgs; + for (Type ty : args) { + auto tensorType = dyn_cast(ty); + if (!tensorType) + bufferArgs.push_back(ty); + else + bufferArgs.push_back(MemRefType::get(tensorType.getShape(), tensorType.getElementType())); + } + auto callbackTy = rewriter.getFunctionType(bufferArgs, emptyRets); + rewriter.modifyOpInPlace(op, [&] { callbackOp.setFunctionType(callbackTy); }); return success(); } From 58a0bed0b4e5789f255fc21d9d9ee849b3618a1a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 3 Sep 2024 17:50:58 -0400 Subject: [PATCH 049/183] Add tentative patch for moduleOp bufferization --- mlir/patches/moduleOp-bufferization.patch | 153 ++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 mlir/patches/moduleOp-bufferization.patch diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch new file mode 100644 index 0000000000..aaedd050bd --- /dev/null +++ b/mlir/patches/moduleOp-bufferization.patch @@ -0,0 +1,153 @@ +diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +index 0a4072605c26..5b1536a59e11 100644 +--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp ++++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +@@ -75,7 +75,7 @@ using namespace mlir::bufferization; + using namespace mlir::bufferization::func_ext; + + /// A mapping of FuncOps to their callers. +-using FuncCallerMap = DenseMap>; ++using FuncCallerMap = DenseMap>; + + /// Get or create FuncAnalysisState. + static FuncAnalysisState & +@@ -247,6 +247,15 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { + SymbolTable::lookupNearestSymbolFrom(callOp, sym)); + } + ++static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { ++ SymbolRefAttr sym = ++ llvm::dyn_cast_if_present(callOp.getCallableForCallee()); ++ if (!sym) ++ return nullptr; ++ return dyn_cast_or_null( ++ SymbolTable::lookupNearestSymbolFrom(callOp, sym)); ++} ++ + /// Gather equivalence info of CallOps. + /// Note: This only adds new equivalence info if the called function was already + /// analyzed. +@@ -277,11 +286,15 @@ static void equivalenceAnalysis(func::FuncOp funcOp, + } + + /// Return "true" if the given function signature has tensor semantics. +-static bool hasTensorSignature(func::FuncOp funcOp) { +- return llvm::any_of(funcOp.getFunctionType().getInputs(), ++static bool hasTensorSignature(FunctionOpInterface funcOp) { ++ return llvm::any_of(funcOp.getArgumentTypes(), + llvm::IsaPred) || +- llvm::any_of(funcOp.getFunctionType().getResults(), ++ llvm::any_of(funcOp.getResultTypes(), + llvm::IsaPred); ++ /*return llvm::any_of(funcOp.getFunctionType().getInputs(), ++ llvm::IsaPred) || ++ llvm::any_of(funcOp.getFunctionType().getResults(), ++ llvm::IsaPred);*/ + } + + /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by +@@ -291,14 +304,16 @@ static bool hasTensorSignature(func::FuncOp funcOp) { + /// retrieve the called FuncOp from any func::CallOp. + static LogicalResult + getFuncOpsOrderedByCalls(ModuleOp moduleOp, +- SmallVectorImpl &orderedFuncOps, ++ SmallVectorImpl &orderedFuncOps, + FuncCallerMap &callerMap) { + // For each FuncOp, the set of functions called by it (i.e. the union of + // symbols of all nested func::CallOp). +- DenseMap> calledBy; ++ DenseMap> calledBy; + // For each FuncOp, the number of func::CallOp it contains. +- DenseMap numberCallOpsContainedInFuncOp; +- WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { ++ DenseMap numberCallOpsContainedInFuncOp; ++ WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOpIn) -> WalkResult { ++ if(isa(funcOpIn)) { ++ func::FuncOp funcOp = cast(funcOpIn); + if (!funcOp.getBody().empty()) { + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + if (!returnOp) +@@ -306,11 +321,12 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, + << "cannot bufferize a FuncOp with tensors and " + "without a unique ReturnOp"; + } ++ } + + // Collect function calls and populate the caller map. +- numberCallOpsContainedInFuncOp[funcOp] = 0; +- return funcOp.walk([&](func::CallOp callOp) -> WalkResult { +- func::FuncOp calledFunction = getCalledFunction(callOp); ++ numberCallOpsContainedInFuncOp[funcOpIn] = 0; ++ return funcOpIn.walk([&](CallOpInterface callOp) -> WalkResult { ++ FunctionOpInterface calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called func::FuncOp"); + // If the called function does not have any tensors in its signature, then + // it is not necessary to bufferize the callee before the caller. +@@ -318,8 +334,8 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, + return WalkResult::skip(); + + callerMap[calledFunction].insert(callOp); +- if (calledBy[calledFunction].insert(funcOp).second) { +- numberCallOpsContainedInFuncOp[funcOp]++; ++ if (calledBy[calledFunction].insert(funcOpIn).second) { ++ numberCallOpsContainedInFuncOp[funcOpIn]++; + } + return WalkResult::advance(); + }); +@@ -379,7 +395,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, + FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); + + // A list of functions in the order in which they are analyzed + bufferized. +- SmallVector orderedFuncOps; ++ SmallVector orderedFuncOps; + + // A mapping of FuncOps to their callers. + FuncCallerMap callerMap; +@@ -388,7 +404,10 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, + return failure(); + + // Analyze ops. +- for (func::FuncOp funcOp : orderedFuncOps) { ++ for (FunctionOpInterface funcOpIn : orderedFuncOps) { ++ if(!isa(funcOpIn)) ++ continue; ++ func::FuncOp funcOp = cast(funcOpIn); + if (!state.getOptions().isOpAllowed(funcOp)) + continue; + +@@ -430,20 +449,20 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( + IRRewriter rewriter(moduleOp.getContext()); + + // A list of functions in the order in which they are analyzed + bufferized. +- SmallVector orderedFuncOps; ++ SmallVector orderedFuncOps; + + // A mapping of FuncOps to their callers. + FuncCallerMap callerMap; + + if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) + return failure(); ++ SmallVector ops; + + // Bufferize functions. +- for (func::FuncOp funcOp : orderedFuncOps) { ++ for (FunctionOpInterface funcOp : orderedFuncOps) { + // Note: It would be good to apply cleanups here but we cannot as aliasInfo + // would be invalidated. +- +- if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) { ++ if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getName())) { + // This function was not analyzed and RaW conflicts were not resolved. + // Buffer copies must be inserted before every write. + OneShotBufferizationOptions updatedOptions = options; +@@ -456,8 +475,8 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( + } + + // Change buffer return types to more precise layout maps. +- if (options.inferFunctionResultLayout) +- foldMemRefCasts(funcOp); ++ if (options.inferFunctionResultLayout && isa(funcOp)) ++ foldMemRefCasts(cast(funcOp)); + } + + // Bufferize all other ops. From f0a10bf13489c2a1072cd34bd8191b2150791d49 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 3 Sep 2024 18:03:13 -0400 Subject: [PATCH 050/183] Update patch script --- mlir/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlir/Makefile b/mlir/Makefile index 1d5a126ef6..c553d3274b 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -12,6 +12,8 @@ ENZYME_BUILD_DIR?=$(MK_DIR)/Enzyme/build RT_BUILD_DIR?=$(MK_DIR)/../runtime/build ENABLE_ASAN?=OFF BUILD_TYPE?=Release +LLVM_TARGET_FILE=$(MK_DIR)/llvm-project/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +LLVM_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch TARGET_FILE=$(MK_DIR)/mlir-hlo/mhlo/transforms/CMakeLists.txt PATCH_FILE=$(MK_DIR)/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -54,6 +56,9 @@ all: llvm mhlo enzyme dialects .PHONY: llvm llvm: @echo "build LLVM and MLIR enabling Python bindings" + @if patch --dry-run -p1 -N $(LLVM_TARGET_FILE) $(LLVM_PATCH_FILE) > /dev/null 2>&1; then \ + patch -p1 $(LLVM_TARGET_FILE) $(LLVM_PATCH_FILE); \ + fi cmake -G Ninja -S llvm-project/llvm -B $(LLVM_BUILD_DIR) \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DLLVM_BUILD_EXAMPLES=OFF \ From 03bee24efc3598de24d20596007b8fc4bfe32ced Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 09:59:41 -0400 Subject: [PATCH 051/183] Reformatting --- .../Transforms/BufferizableOpInterfaceImpl.h | 2 - mlir/lib/Catalyst/IR/CatalystDialect.cpp | 4 +- .../BufferizableOpInterfaceImpl.cpp | 119 +++++----- .../BufferizableOpInterfaceImpl.cpp | 37 +-- .../BufferizableOpInterfaceImpl.cpp | 216 ++++++++++-------- mlir/tools/quantum-opt/quantum-opt.cpp | 6 +- 6 files changed, 220 insertions(+), 164 deletions(-) diff --git a/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h index 9c0e982cb0..e56c914ba4 100644 --- a/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h @@ -4,8 +4,6 @@ using namespace mlir; namespace catalyst { - void registerBufferizableOpInterfaceExternalModels(mlir::DialectRegistry ®istry); - } // namespace catalyst \ No newline at end of file diff --git a/mlir/lib/Catalyst/IR/CatalystDialect.cpp b/mlir/lib/Catalyst/IR/CatalystDialect.cpp index a94a9658c4..158330521d 100644 --- a/mlir/lib/Catalyst/IR/CatalystDialect.cpp +++ b/mlir/lib/Catalyst/IR/CatalystDialect.cpp @@ -41,8 +41,8 @@ void CatalystDialect::initialize() #define GET_OP_LIST #include "Catalyst/IR/CatalystOps.cpp.inc" >(); - declarePromisedInterfaces(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index f29cc6fe41..fff38f63c2 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -15,33 +15,36 @@ namespace { /// Bufferization of catalyst.print. Get memref of printOp.val. struct PrintOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto printOp = cast(op); if (printOp.getVal()) { FailureOr source = getBuffer(rewriter, printOp.getVal(), options); if (failed(source)) return failure(); - bufferization::replaceOpWithNewBufferizedOp(rewriter, op, *source, - printOp.getConstValAttr(), printOp.getPrintDescriptorAttr()); + bufferization::replaceOpWithNewBufferizedOp( + rewriter, op, *source, printOp.getConstValAttr(), printOp.getPrintDescriptorAttr()); } return success(); } @@ -50,25 +53,29 @@ struct PrintOpInterface /// Bufferization of catalyst.print. Mainly get buffers for arguments. struct CustomCallOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + CustomCallOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto customCallOp = cast(op); // Add bufferized arguments @@ -99,7 +106,7 @@ struct CustomCallOpInterface bufferArgs.push_back(newBuffer); } - // Add the initial number of arguments + // Add the initial number of arguments int32_t numArguments = static_cast(customCallOp.getNumOperands()); DenseI32ArrayAttr numArgumentsDenseAttr = rewriter.getDenseI32ArrayAttr({numArguments}); @@ -116,11 +123,12 @@ struct CustomCallOpInterface struct CallbackOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< - CallbackOpInterface, CallbackOp> { + CallbackOpInterface, CallbackOp> { static bool supportsUnstructuredControlFlow() { return true; } - bool hasTensorSemantics(Operation *op) const { + bool hasTensorSemantics(Operation *op) const + { auto isaTensor = llvm::IsaPred; // A function has tensor semantics if it has tensor arguments/results. @@ -134,40 +142,45 @@ struct CallbackOpInterface } bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto callbackOp = cast(op); - auto argTys = callbackOp.getArgumentTypes(); - auto retTys = callbackOp.getResultTypes(); - SmallVector emptyRets; - SmallVector args(argTys.begin(), argTys.end()); - args.insert(args.end(), retTys.begin(), retTys.end()); - SmallVector bufferArgs; - for (Type ty : args) { - auto tensorType = dyn_cast(ty); - if (!tensorType) - bufferArgs.push_back(ty); - else - bufferArgs.push_back(MemRefType::get(tensorType.getShape(), tensorType.getElementType())); - } - auto callbackTy = rewriter.getFunctionType(bufferArgs, emptyRets); - rewriter.modifyOpInPlace(op, [&] { callbackOp.setFunctionType(callbackTy); }); + auto argTys = callbackOp.getArgumentTypes(); + auto retTys = callbackOp.getResultTypes(); + SmallVector emptyRets; + SmallVector args(argTys.begin(), argTys.end()); + args.insert(args.end(), retTys.begin(), retTys.end()); + SmallVector bufferArgs; + for (Type ty : args) { + auto tensorType = dyn_cast(ty); + if (!tensorType) + bufferArgs.push_back(ty); + else + bufferArgs.push_back( + MemRefType::get(tensorType.getShape(), tensorType.getElementType())); + } + auto callbackTy = rewriter.getFunctionType(bufferArgs, emptyRets); + rewriter.modifyOpInPlace(op, [&] { callbackOp.setFunctionType(callbackTy); }); return success(); } @@ -175,25 +188,29 @@ struct CallbackOpInterface struct CallbackCallOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + CallbackCallOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto callOp = cast(op); bufferization::BufferizeTypeConverter typeConverter; @@ -202,7 +219,7 @@ struct CallbackCallOpInterface if (failed(typeConverter.convertTypes(callOp.getResultTypes(), convertedResults))) return failure(); - if(callOp->getNumResults() != convertedResults.size()) + if (callOp->getNumResults() != convertedResults.size()) return failure(); SmallVector newInputs; @@ -243,8 +260,8 @@ struct CallbackCallOpInterface } // namespace -void catalyst::registerBufferizableOpInterfaceExternalModels( - DialectRegistry ®istry) { +void catalyst::registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) +{ registry.addExtension(+[](MLIRContext *ctx, CatalystDialect *dialect) { CustomCallOp::attachInterface(*ctx); PrintOp::attachInterface(*ctx); diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 1dfb518e5e..d1155da8e7 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,7 +1,6 @@ #include "mlir/Conversion/LLVMCommon/MemRefBuilder.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" -#include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" @@ -42,31 +41,35 @@ LLVM::LLVMFuncOp ensureFunctionDeclaration(RewriterBase &rewriter, Operation *op } struct AdjointOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto adjointOp = cast(op); MLIRContext *ctx = rewriter.getContext(); Location loc = op->getLoc(); bufferization::BufferizeTypeConverter typeConverter; - Type vectorType = typeConverter.convertType(MemRefType::get({UNKNOWN}, Float64Type::get(ctx))); + Type vectorType = + typeConverter.convertType(MemRefType::get({UNKNOWN}, Float64Type::get(ctx))); for (Type type : adjointOp.getResultTypes()) { if (!isa(type)) @@ -75,12 +78,13 @@ struct AdjointOpInterface // Currently only expval gradients are supported by the runtime, // leading to tensor return values. if (dyn_cast(type) != MemRefType::get({UNKNOWN}, Float64Type::get(ctx))) - return adjointOp.emitOpError("adjoint can only return MemRef or tuple thereof"); + return adjointOp.emitOpError( + "adjoint can only return MemRef or tuple thereof"); } // The callee of the adjoint op must return as a single result the quantum register. - func::FuncOp callee = - SymbolTable::lookupNearestSymbolFrom(adjointOp, adjointOp.getCalleeAttr()); + func::FuncOp callee = SymbolTable::lookupNearestSymbolFrom( + adjointOp, adjointOp.getCalleeAttr()); assert(callee && callee.getNumResults() == 1 && "invalid qfunc symbol in adjoint op"); StringRef cacheFnName = "__catalyst__rt__toggle_recorder"; @@ -124,7 +128,8 @@ struct AdjointOpInterface rewriter.create(loc, gradFnDecl, args); rewriter.create(loc, qreg); - mlir::bufferization::replaceOpWithNewBufferizedOp(rewriter, adjointOp); + mlir::bufferization::replaceOpWithNewBufferizedOp( + rewriter, adjointOp); return success(); } @@ -132,8 +137,8 @@ struct AdjointOpInterface } // namespace -void catalyst::gradient::registerBufferizableOpInterfaceExternalModels( - DialectRegistry ®istry) { +void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) +{ registry.addExtension(+[](MLIRContext *ctx, GradientDialect *dialect) { AdjointOp::attachInterface(*ctx); }); diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 8e09a014eb..08689c18a9 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -14,38 +14,43 @@ namespace { /// Bufferization of catalyst.quantum.unitary. Convert Matrix into memref. struct QubitUnitaryOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + QubitUnitaryOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto qubitUnitaryOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(qubitUnitaryOp.getMatrix().getType()); MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - qubitUnitaryOp.getMatrix()); + auto toMemrefOp = + rewriter.create(loc, memrefType, qubitUnitaryOp.getMatrix()); auto memref = toMemrefOp.getResult(); auto newQubitUnitaryOp = rewriter.create( - loc, qubitUnitaryOp.getOutQubits().getTypes(), - qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, - qubitUnitaryOp.getInQubits(), qubitUnitaryOp.getAdjointAttr(), - qubitUnitaryOp.getInCtrlQubits(), qubitUnitaryOp.getInCtrlValues()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, newQubitUnitaryOp.getOutQubits()); + loc, qubitUnitaryOp.getOutQubits().getTypes(), + qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, qubitUnitaryOp.getInQubits(), + qubitUnitaryOp.getAdjointAttr(), qubitUnitaryOp.getInCtrlQubits(), + qubitUnitaryOp.getInCtrlValues()); + bufferization::replaceOpWithBufferizedValues(rewriter, op, + newQubitUnitaryOp.getOutQubits()); return success(); } @@ -54,31 +59,35 @@ struct QubitUnitaryOpInterface /// Bufferization of catalyst.quantum.hermitian. Convert Matrix into memref. struct HermitianOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + HermitianOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto hermitianOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(hermitianOp.getMatrix().getType()); MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - hermitianOp.getMatrix()); + auto toMemrefOp = + rewriter.create(loc, memrefType, hermitianOp.getMatrix()); auto memref = toMemrefOp.getResult(); auto newHermitianOp = rewriter.create(loc, hermitianOp.getType(), memref, hermitianOp.getQubits()); @@ -91,34 +100,38 @@ struct HermitianOpInterface /// Bufferization of catalyst.quantum.hamiltonian. Convert Matrix into memref. struct HamiltonianOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + HamiltonianOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto hamiltonianOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(hamiltonianOp.getCoeffs().getType()); MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - hamiltonianOp.getCoeffs()); + auto toMemrefOp = + rewriter.create(loc, memrefType, hamiltonianOp.getCoeffs()); auto memref = toMemrefOp.getResult(); auto newHamiltonianOp = rewriter.create(loc, hamiltonianOp.getType(), memref, - hamiltonianOp.getTerms()); + hamiltonianOp.getTerms()); bufferization::replaceOpWithBufferizedValues(rewriter, op, newHamiltonianOp.getObs()); return success(); @@ -128,26 +141,29 @@ struct HamiltonianOpInterface /// Bufferization of catalyst.quantum.sample. Replace with memref.alloc and a new /// catalyst.quantum.sample that uses the memory allocated by memref.alloc. struct SampleOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto sampleOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(sampleOp.getSamples().getType()); @@ -165,26 +181,29 @@ struct SampleOpInterface /// Bufferization of catalyst.quantum.state. Replace with memref.alloc and a new /// catalyst.quantum.state that uses the memory allocated by memref.alloc. struct StateOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto stateOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(stateOp.getState().getType()); @@ -201,26 +220,29 @@ struct StateOpInterface /// Bufferization of catalyst.quantum.probs. Replace with memref.alloc and a new /// catalyst.quantum.probs that uses the memory allocated by memref.alloc. struct ProbsOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto probsOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(probsOp.getProbabilities().getType()); @@ -237,38 +259,44 @@ struct ProbsOpInterface /// Bufferization of catalyst.quantum.counts. Replace with memref.allocs and a new /// catalyst.quantum.counts that uses the memory allocated by memref.allocs. struct CountsOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto countsOp = cast(op); Location loc = op->getLoc(); auto tensorType0 = cast(countsOp.getEigvals().getType()); auto tensorType1 = cast(countsOp.getCounts().getType()); - MemRefType resultType0 = MemRefType::get(tensorType0.getShape(), tensorType0.getElementType()); - MemRefType resultType1 = MemRefType::get(tensorType1.getShape(), tensorType1.getElementType()); + MemRefType resultType0 = + MemRefType::get(tensorType0.getShape(), tensorType0.getElementType()); + MemRefType resultType1 = + MemRefType::get(tensorType1.getShape(), tensorType1.getElementType()); Value allocVal0 = rewriter.create(loc, resultType0); Value allocVal1 = rewriter.create(loc, resultType1); rewriter.create(loc, nullptr, nullptr, countsOp.getObs(), allocVal0, allocVal1, countsOp.getShotsAttr()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, ValueRange{allocVal0, allocVal1}); + bufferization::replaceOpWithBufferizedValues(rewriter, op, + ValueRange{allocVal0, allocVal1}); return success(); } @@ -277,32 +305,36 @@ struct CountsOpInterface /// Bufferization of catalyst.quantum.set_state. Convert InState into memref. struct SetStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + SetStateOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto setStateOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(setStateOp.getInState().getType()); MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - setStateOp.getInState()); + auto toMemrefOp = + rewriter.create(loc, memrefType, setStateOp.getInState()); auto memref = toMemrefOp.getResult(); auto newSetStateOp = rewriter.create(loc, setStateOp.getOutQubits().getTypes(), memref, setStateOp.getInQubits()); @@ -314,35 +346,39 @@ struct SetStateOpInterface /// Bufferization of catalyst.quantum.set_basic_state. Convert BasisState into memref. struct SetBasisStateOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + SetBasisStateOp> { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return false; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return true; } - bufferization::AliasingValueList getAliasingValues(Operation *op, - OpOperand &opOperand, - const bufferization::AnalysisState &state) const { + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { return {}; } LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - const bufferization::BufferizationOptions &options) const { + const bufferization::BufferizationOptions &options) const + { auto setBasisStateOp = cast(op); Location loc = op->getLoc(); auto tensorType = cast(setBasisStateOp.getBasisState().getType()); MemRefType memrefType = MemRefType::get(tensorType.getShape(), tensorType.getElementType()); - auto toMemrefOp = rewriter.create(loc, memrefType, - setBasisStateOp.getBasisState()); + auto toMemrefOp = rewriter.create( + loc, memrefType, setBasisStateOp.getBasisState()); auto memref = toMemrefOp.getResult(); - auto newSetStateOp = rewriter.create(loc, setBasisStateOp.getOutQubits().getTypes(), - memref, setBasisStateOp.getInQubits()); + auto newSetStateOp = rewriter.create( + loc, setBasisStateOp.getOutQubits().getTypes(), memref, setBasisStateOp.getInQubits()); bufferization::replaceOpWithBufferizedValues(rewriter, op, newSetStateOp.getOutQubits()); return success(); } @@ -350,8 +386,8 @@ struct SetBasisStateOpInterface } // namespace -void catalyst::quantum::registerBufferizableOpInterfaceExternalModels( - DialectRegistry ®istry) { +void catalyst::quantum::registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) +{ registry.addExtension(+[](MLIRContext *ctx, QuantumDialect *dialect) { QubitUnitaryOp::attachInterface(*ctx); HermitianOp::attachInterface(*ctx); diff --git a/mlir/tools/quantum-opt/quantum-opt.cpp b/mlir/tools/quantum-opt/quantum-opt.cpp index 96d9f022a9..aa11589368 100644 --- a/mlir/tools/quantum-opt/quantum-opt.cpp +++ b/mlir/tools/quantum-opt/quantum-opt.cpp @@ -25,16 +25,16 @@ #include "mhlo/IR/hlo_ops.h" #include "Catalyst/IR/CatalystDialect.h" -#include "Catalyst/Transforms/Passes.h" #include "Catalyst/Transforms/BufferizableOpInterfaceImpl.h" +#include "Catalyst/Transforms/Passes.h" #include "Gradient/IR/GradientDialect.h" -#include "Gradient/Transforms/Passes.h" #include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" +#include "Gradient/Transforms/Passes.h" #include "Mitigation/IR/MitigationDialect.h" #include "Mitigation/Transforms/Passes.h" #include "Quantum/IR/QuantumDialect.h" -#include "Quantum/Transforms/Passes.h" #include "Quantum/Transforms/BufferizableOpInterfaceImpl.h" +#include "Quantum/Transforms/Passes.h" int main(int argc, char **argv) { From ba85b20f636d9e84419e28d54b1ad889a863776d Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 13:10:45 -0400 Subject: [PATCH 052/183] Update llvm patch --- mlir/patches/moduleOp-bufferization.patch | 96 ++++++++++++++--------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch index aaedd050bd..c9d15d3c5f 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/moduleOp-bufferization.patch @@ -1,5 +1,5 @@ diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp -index 0a4072605c26..5b1536a59e11 100644 +index 0a4072605c26..2983af0fcbf3 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; @@ -27,7 +27,7 @@ index 0a4072605c26..5b1536a59e11 100644 /// Gather equivalence info of CallOps. /// Note: This only adds new equivalence info if the called function was already /// analyzed. -@@ -277,11 +286,15 @@ static void equivalenceAnalysis(func::FuncOp funcOp, +@@ -277,10 +286,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, } /// Return "true" if the given function signature has tensor semantics. @@ -39,14 +39,9 @@ index 0a4072605c26..5b1536a59e11 100644 - llvm::any_of(funcOp.getFunctionType().getResults(), + llvm::any_of(funcOp.getResultTypes(), llvm::IsaPred); -+ /*return llvm::any_of(funcOp.getFunctionType().getInputs(), -+ llvm::IsaPred) || -+ llvm::any_of(funcOp.getFunctionType().getResults(), -+ llvm::IsaPred);*/ } - /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by -@@ -291,14 +304,16 @@ static bool hasTensorSignature(func::FuncOp funcOp) { +@@ -291,26 +300,30 @@ static bool hasTensorSignature(func::FuncOp funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, @@ -60,41 +55,36 @@ index 0a4072605c26..5b1536a59e11 100644 // For each FuncOp, the number of func::CallOp it contains. - DenseMap numberCallOpsContainedInFuncOp; - WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { +- if (!funcOp.getBody().empty()) { +- func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); +- if (!returnOp) +- return funcOp->emitError() +- << "cannot bufferize a FuncOp with tensors and " +- "without a unique ReturnOp"; + DenseMap numberCallOpsContainedInFuncOp; -+ WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOpIn) -> WalkResult { -+ if(isa(funcOpIn)) { -+ func::FuncOp funcOp = cast(funcOpIn); - if (!funcOp.getBody().empty()) { - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); - if (!returnOp) -@@ -306,11 +321,12 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, - << "cannot bufferize a FuncOp with tensors and " - "without a unique ReturnOp"; ++ WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { ++ // Only handle ReturnOp if funcOp is exactly the FuncOp type. ++ if(isa(funcOp)) { ++ FuncOp funcOpCasted = cast(funcOp); ++ if (!funcOpCasted.getBody().empty()) { ++ func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOpCasted); ++ if (!returnOp) ++ return funcOp->emitError() ++ << "cannot bufferize a FuncOp with tensors and " ++ "without a unique ReturnOp"; ++ } } -+ } // Collect function calls and populate the caller map. -- numberCallOpsContainedInFuncOp[funcOp] = 0; + numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](func::CallOp callOp) -> WalkResult { - func::FuncOp calledFunction = getCalledFunction(callOp); -+ numberCallOpsContainedInFuncOp[funcOpIn] = 0; -+ return funcOpIn.walk([&](CallOpInterface callOp) -> WalkResult { ++ return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { + FunctionOpInterface calledFunction = getCalledFunction(callOp); assert(calledFunction && "could not retrieved called func::FuncOp"); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. -@@ -318,8 +334,8 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, - return WalkResult::skip(); - - callerMap[calledFunction].insert(callOp); -- if (calledBy[calledFunction].insert(funcOp).second) { -- numberCallOpsContainedInFuncOp[funcOp]++; -+ if (calledBy[calledFunction].insert(funcOpIn).second) { -+ numberCallOpsContainedInFuncOp[funcOpIn]++; - } - return WalkResult::advance(); - }); -@@ -379,7 +395,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -379,7 +392,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. @@ -103,18 +93,48 @@ index 0a4072605c26..5b1536a59e11 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -388,7 +404,10 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -388,27 +401,33 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (func::FuncOp funcOp : orderedFuncOps) { -+ for (FunctionOpInterface funcOpIn : orderedFuncOps) { -+ if(!isa(funcOpIn)) +- if (!state.getOptions().isOpAllowed(funcOp)) ++ for (FunctionOpInterface funcOp : orderedFuncOps) { ++ ++ // The following analysis is specific to the FuncOp type. ++ if(!isa(funcOp)) + continue; -+ func::FuncOp funcOp = cast(funcOpIn); - if (!state.getOptions().isOpAllowed(funcOp)) ++ FuncOp funcOpCasted = cast(funcOp); ++ ++ if (!state.getOptions().isOpAllowed(funcOpCasted)) continue; + // Now analyzing function. +- funcState.startFunctionAnalysis(funcOp); ++ funcState.startFunctionAnalysis(funcOpCasted); + + // Gather equivalence info for CallOps. +- equivalenceAnalysis(funcOp, state, funcState); ++ equivalenceAnalysis(funcOpCasted, state, funcState); + + // Analyze funcOp. +- if (failed(analyzeOp(funcOp, state, statistics))) ++ if (failed(analyzeOp(funcOpCasted, state, statistics))) + return failure(); + + // Run some extra function analyses. +- if (failed(aliasingFuncOpBBArgsAnalysis(funcOp, state, funcState)) || +- failed(funcOpBbArgReadWriteAnalysis(funcOp, state, funcState))) ++ if (failed(aliasingFuncOpBBArgsAnalysis(funcOpCasted, state, funcState)) || ++ failed(funcOpBbArgReadWriteAnalysis(funcOpCasted, state, funcState))) + return failure(); + + // Mark op as fully analyzed. +- funcState.analyzedFuncOps[funcOp] = FuncOpAnalysisState::Analyzed; ++ funcState.analyzedFuncOps[funcOpCasted] = FuncOpAnalysisState::Analyzed; + } + + return success(); @@ -430,20 +449,20 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); From 1c730d2aa52ce0abf6ffb7c9da22b6f8fa15205d Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 13:15:19 -0400 Subject: [PATCH 053/183] Stick with old llvm version for now --- .dep-versions | 4 ++-- mlir/llvm-project | 2 +- mlir/mlir-hlo | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.dep-versions b/.dep-versions index d66a0df75e..25e038fc10 100644 --- a/.dep-versions +++ b/.dep-versions @@ -1,7 +1,7 @@ # Always update the version check in catalyst.__init__ when changing the JAX version. jax=0.4.28 -mhlo=39c37c43fb9db18144f2e155a0fe65864646a968 -llvm=6f2c61071c274a1b5e212e6ad4114641ec7c7fc3 +mhlo=89a891c986650c33df76885f5620e0a92150d90f +llvm=3a8316216807d64a586b971f51695e23883331f7 enzyme=v0.0.130 # Always remove custom PL/LQ versions before release. diff --git a/mlir/llvm-project b/mlir/llvm-project index 6f2c61071c..3a83162168 160000 --- a/mlir/llvm-project +++ b/mlir/llvm-project @@ -1 +1 @@ -Subproject commit 6f2c61071c274a1b5e212e6ad4114641ec7c7fc3 +Subproject commit 3a8316216807d64a586b971f51695e23883331f7 diff --git a/mlir/mlir-hlo b/mlir/mlir-hlo index 39c37c43fb..89a891c986 160000 --- a/mlir/mlir-hlo +++ b/mlir/mlir-hlo @@ -1 +1 @@ -Subproject commit 39c37c43fb9db18144f2e155a0fe65864646a968 +Subproject commit 89a891c986650c33df76885f5620e0a92150d90f From 31c9139bb43134d5a5b757dd8237da818a91d466 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 13:28:29 -0400 Subject: [PATCH 054/183] Rollback llvvm fix --- mlir/lib/Driver/CompilerDriver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Driver/CompilerDriver.cpp b/mlir/lib/Driver/CompilerDriver.cpp index 67533ad2ef..004fa92c7f 100644 --- a/mlir/lib/Driver/CompilerDriver.cpp +++ b/mlir/lib/Driver/CompilerDriver.cpp @@ -673,7 +673,7 @@ LogicalResult QuantumDriverMain(const CompilerOptions &options, CompilerOutput & if (options.lowerToLLVM) { llvmModule = timer::timer(translateModuleToLLVMIR, "translateModuleToLLVMIR", - /* add_endl */ false, *op, llvmContext, "LLVMDialectModule", false); + /* add_endl */ false, *op, llvmContext, "LLVMDialectModule"); if (!llvmModule) { CO_MSG(options, Verbosity::Urgent, "Failed to translate LLVM module\n"); return failure(); From 525422b71b3a48b0ca26de4dcd363b902f9956b7 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 13:32:31 -0400 Subject: [PATCH 055/183] Reformatting --- frontend/catalyst/compiler.py | 20 ++++++++++---------- mlir/lib/Driver/CompilerDriver.cpp | 5 ++--- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 85d3172db6..91df7f9716 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,12 +195,12 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "one-shot-bufferize{bufferize-function-boundaries}", - #"func.func(buffer-hoisting)", - #"func.func(buffer-loop-hoisting)", - #"buffer-results-to-out-params", - #"drop-equivalent-buffer-results", - #"func.func(promote-buffers-to-stack)", - #"buffer-deallocation-pipeline", + # "func.func(buffer-hoisting)", + # "func.func(buffer-loop-hoisting)", + # "buffer-results-to-out-params", + # "drop-equivalent-buffer-results", + # "func.func(promote-buffers-to-stack)", + # "buffer-deallocation-pipeline", ], ) @@ -256,8 +256,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt HLO_LOWERING_PASS, QUANTUM_COMPILATION_PASS, BUFFERIZATION_PASS, - #BUFFERIZATION_PASS2, - #BUFFERIZATION_PASS3, + # BUFFERIZATION_PASS2, + # BUFFERIZATION_PASS3, MLIR_TO_LLVM_PASS, ] @@ -273,8 +273,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt HLO_LOWERING_PASS, QUANTUM_COMPILATION_PASS, BUFFERIZATION_PASS, - #BUFFERIZATION_PASS2, - #BUFFERIZATION_PASS3, + # BUFFERIZATION_PASS2, + # BUFFERIZATION_PASS3, MLIR_TO_LLVM_ASYNC_PASS, ] diff --git a/mlir/lib/Driver/CompilerDriver.cpp b/mlir/lib/Driver/CompilerDriver.cpp index 004fa92c7f..74340385a6 100644 --- a/mlir/lib/Driver/CompilerDriver.cpp +++ b/mlir/lib/Driver/CompilerDriver.cpp @@ -671,9 +671,8 @@ LogicalResult QuantumDriverMain(const CompilerOptions &options, CompilerOutput & outIRStream << *op; if (options.lowerToLLVM) { - llvmModule = - timer::timer(translateModuleToLLVMIR, "translateModuleToLLVMIR", - /* add_endl */ false, *op, llvmContext, "LLVMDialectModule"); + llvmModule = timer::timer(translateModuleToLLVMIR, "translateModuleToLLVMIR", + /* add_endl */ false, *op, llvmContext, "LLVMDialectModule"); if (!llvmModule) { CO_MSG(options, Verbosity::Urgent, "Failed to translate LLVM module\n"); return failure(); From d7c236d7e31c947319ccc5dd80cab2d9202f6182 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 15:49:18 -0400 Subject: [PATCH 056/183] Update patch --- mlir/patches/moduleOp-bufferization.patch | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch index c9d15d3c5f..75a8a65639 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/moduleOp-bufferization.patch @@ -1,5 +1,5 @@ diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp -index 0a4072605c26..2983af0fcbf3 100644 +index 0a4072605c26..5231fe860553 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; @@ -135,7 +135,7 @@ index 0a4072605c26..2983af0fcbf3 100644 } return success(); -@@ -430,20 +449,20 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -430,7 +449,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. @@ -144,17 +144,15 @@ index 0a4072605c26..2983af0fcbf3 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; - - if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) +@@ -439,11 +458,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); -+ SmallVector ops; // Bufferize functions. - for (func::FuncOp funcOp : orderedFuncOps) { + for (FunctionOpInterface funcOp : orderedFuncOps) { // Note: It would be good to apply cleanups here but we cannot as aliasInfo // would be invalidated. -- + - if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) { + if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getName())) { // This function was not analyzed and RaW conflicts were not resolved. From ac4fc89b623bdc17482b606eeca07ee0479c4282 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 16:48:59 -0400 Subject: [PATCH 057/183] Correct AdjointOpInterface --- mlir/lib/Gradient/IR/GradientDialect.cpp | 2 + .../BufferizableOpInterfaceImpl.cpp | 80 ++++--------------- 2 files changed, 17 insertions(+), 65 deletions(-) diff --git a/mlir/lib/Gradient/IR/GradientDialect.cpp b/mlir/lib/Gradient/IR/GradientDialect.cpp index 4d9cfddb00..74f7d05aa7 100644 --- a/mlir/lib/Gradient/IR/GradientDialect.cpp +++ b/mlir/lib/Gradient/IR/GradientDialect.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Transforms/InliningUtils.h" #include "Gradient/IR/GradientDialect.h" @@ -50,6 +51,7 @@ void GradientDialect::initialize() #include "Gradient/IR/GradientOps.cpp.inc" >(); addInterface(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index d1155da8e7..15c2090abd 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -64,73 +64,23 @@ struct AdjointOpInterface LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { - auto adjointOp = cast(op); - MLIRContext *ctx = rewriter.getContext(); - Location loc = op->getLoc(); - bufferization::BufferizeTypeConverter typeConverter; - Type vectorType = - typeConverter.convertType(MemRefType::get({UNKNOWN}, Float64Type::get(ctx))); - - for (Type type : adjointOp.getResultTypes()) { - if (!isa(type)) - return adjointOp.emitOpError("must be bufferized before lowering"); - - // Currently only expval gradients are supported by the runtime, - // leading to tensor return values. - if (dyn_cast(type) != MemRefType::get({UNKNOWN}, Float64Type::get(ctx))) - return adjointOp.emitOpError( - "adjoint can only return MemRef or tuple thereof"); + llvm::outs() << "Happens?\n"; + SmallVector resTypes; + if (failed(getTypeConverter()->convertTypes(op.getResultTypes(), resTypes))) + return failure(); + + Location loc = op.getLoc(); + Value gradSize = op.getGradSize(); + SmallVector memrefValues; + for (Type resType : resTypes) { + MemRefType memrefType = cast(resType); + Value memrefValue = rewriter.create(loc, memrefType, gradSize); + memrefValues.push_back(memrefValue); } - // The callee of the adjoint op must return as a single result the quantum register. - func::FuncOp callee = SymbolTable::lookupNearestSymbolFrom( - adjointOp, adjointOp.getCalleeAttr()); - assert(callee && callee.getNumResults() == 1 && "invalid qfunc symbol in adjoint op"); - - StringRef cacheFnName = "__catalyst__rt__toggle_recorder"; - StringRef gradFnName = "__catalyst__qis__Gradient"; - Type cacheFnSignature = - LLVM::LLVMFunctionType::get(LLVM::LLVMVoidType::get(ctx), IntegerType::get(ctx, 1)); - Type gradFnSignature = LLVM::LLVMFunctionType::get( - LLVM::LLVMVoidType::get(ctx), IntegerType::get(ctx, 64), /*isVarArg=*/true); - - LLVM::LLVMFuncOp cacheFnDecl = - ensureFunctionDeclaration(rewriter, adjointOp, cacheFnName, cacheFnSignature); - LLVM::LLVMFuncOp gradFnDecl = - ensureFunctionDeclaration(rewriter, adjointOp, gradFnName, gradFnSignature); - - // Run the forward pass and cache the circuit. - Value c_true = rewriter.create( - loc, rewriter.getIntegerAttr(IntegerType::get(ctx, 1), 1)); - Value c_false = rewriter.create( - loc, rewriter.getIntegerAttr(IntegerType::get(ctx, 1), 0)); - rewriter.create(loc, cacheFnDecl, c_true); - Value qreg = rewriter.create(loc, callee, adjointOp.getArgs()).getResult(0); - if (!isa(qreg.getType())) - return callee.emitOpError("qfunc must return quantum register"); - rewriter.create(loc, cacheFnDecl, c_false); - - // We follow the C ABI convention of passing result memrefs as struct pointers in the - // arguments to the C function, although in this case as a variadic argument list to allow - // for a varying number of results in a single signature. - Value c1 = rewriter.create(loc, rewriter.getI64IntegerAttr(1)); - Value numResults = rewriter.create( - loc, rewriter.getI64IntegerAttr(adjointOp.getDataIn().size())); - SmallVector args = {numResults}; - - // Might need to modify here. - for (Value memref : adjointOp.getDataIn()) { - Value newArg = rewriter.create( - loc, LLVM::LLVMPointerType::get(rewriter.getContext()), vectorType, c1); - rewriter.create(loc, memref, newArg); - args.push_back(newArg); - } - - rewriter.create(loc, gradFnDecl, args); - rewriter.create(loc, qreg); - mlir::bufferization::replaceOpWithNewBufferizedOp( - rewriter, adjointOp); - + rewriter.create(loc, TypeRange{}, op.getCalleeAttr(), adaptor.getGradSize(), + adaptor.getArgs(), memrefValues); + bufferization::replaceOpWithBufferizedValues(rewriter, op, memrefValues); return success(); } }; From 4428eec98b1ec6ecc349ac865e0bab4c988595ef Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 4 Sep 2024 17:03:21 -0400 Subject: [PATCH 058/183] Fix bufferization in AjointOp --- .../BufferizableOpInterfaceImpl.cpp | 46 ++++++++----------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 15c2090abd..4e95bd0373 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -19,27 +19,6 @@ using namespace catalyst::gradient; namespace { -constexpr int64_t UNKNOWN = ShapedType::kDynamic; - -LLVM::LLVMFuncOp ensureFunctionDeclaration(RewriterBase &rewriter, Operation *op, - StringRef fnSymbol, Type fnType) -{ - Operation *fnDecl = SymbolTable::lookupNearestSymbolFrom(op, rewriter.getStringAttr(fnSymbol)); - - if (!fnDecl) { - RewriterBase::InsertionGuard insertGuard(rewriter); - ModuleOp mod = op->getParentOfType(); - rewriter.setInsertionPointToStart(mod.getBody()); - - fnDecl = rewriter.create(op->getLoc(), fnSymbol, fnType); - } - else { - assert(isa(fnDecl) && "QIR function declaration is not a LLVMFuncOp"); - } - - return cast(fnDecl); -} - struct AdjointOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, @@ -65,12 +44,17 @@ struct AdjointOpInterface const bufferization::BufferizationOptions &options) const { llvm::outs() << "Happens?\n"; + + auto adjointOp = cast(op); + + bufferization::BufferizeTypeConverter typeConverter; + SmallVector resTypes; - if (failed(getTypeConverter()->convertTypes(op.getResultTypes(), resTypes))) + if (failed(typeConverter.convertTypes(adjointOp.getResultTypes(), resTypes))) return failure(); - Location loc = op.getLoc(); - Value gradSize = op.getGradSize(); + Location loc = adjointOp.getLoc(); + Value gradSize = adjointOp.getGradSize(); SmallVector memrefValues; for (Type resType : resTypes) { MemRefType memrefType = cast(resType); @@ -78,8 +62,18 @@ struct AdjointOpInterface memrefValues.push_back(memrefValue); } - rewriter.create(loc, TypeRange{}, op.getCalleeAttr(), adaptor.getGradSize(), - adaptor.getArgs(), memrefValues); + SmallVector bufferArgs; + ValueRange operands = adjointOp.getArgs(); + for (Value operand : operands) { + FailureOr opBuffer = getBuffer(rewriter, operand, options); + if (failed(opBuffer)) + return failure(); + bufferArgs.push_back(*opBuffer); + } + + + rewriter.create(loc, TypeRange{}, adjointOp.getCalleeAttr(), adjointOp.getGradSize(), + bufferArgs, memrefValues); bufferization::replaceOpWithBufferizedValues(rewriter, op, memrefValues); return success(); } From dde8e6849631a54726f9aec158e16a8537421747 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 5 Sep 2024 11:37:35 -0400 Subject: [PATCH 059/183] Add backpropOp bufferization --- mlir/lib/Gradient/IR/GradientDialect.cpp | 3 +- .../BufferizableOpInterfaceImpl.cpp | 135 +++++++++++++++++- 2 files changed, 134 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Gradient/IR/GradientDialect.cpp b/mlir/lib/Gradient/IR/GradientDialect.cpp index 74f7d05aa7..c80b1b5eb6 100644 --- a/mlir/lib/Gradient/IR/GradientDialect.cpp +++ b/mlir/lib/Gradient/IR/GradientDialect.cpp @@ -51,7 +51,8 @@ void GradientDialect::initialize() #include "Gradient/IR/GradientOps.cpp.inc" >(); addInterface(); - declarePromisedInterfaces(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 4e95bd0373..50393fc416 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -5,6 +5,7 @@ #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" @@ -12,6 +13,7 @@ #include "Gradient/IR/GradientOps.h" #include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" +#include "Gradient/Utils/GradientShape.h" #include "Quantum/IR/QuantumOps.h" using namespace mlir; @@ -19,12 +21,42 @@ using namespace catalyst::gradient; namespace { +Value generateAllocation(OpBuilder &builder, Location loc, Value reference) +{ + auto memrefType = cast(reference.getType()); + // Get dynamic dimension sizes from the provided reference value if necessary. + SmallVector dynamicDims; + if (!memrefType.hasStaticShape()) { + for (int64_t dim = 0; dim < memrefType.getRank(); dim++) { + if (memrefType.isDynamicDim(dim)) { + Value dimIndex = builder.create(loc, dim); + dynamicDims.push_back(builder.create(loc, reference, dimIndex)); + } + } + } + + return builder.create(loc, memrefType, dynamicDims); +} + +/// Helper function to generate a set of memref allocations. +/// +/// The allocation size and shape is deduced from a list of existing memref values. +/// +void generateAllocations(RewriterBase &rewriter, Location loc, + SmallVectorImpl &allocations, ValueRange referenceValues) +{ + for (Value memref : referenceValues) { + allocations.push_back( + generateAllocation(rewriter, loc, cast>(memref))); + } +} + struct AdjointOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return false; + return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, @@ -43,8 +75,6 @@ struct AdjointOpInterface LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { - llvm::outs() << "Happens?\n"; - auto adjointOp = cast(op); bufferization::BufferizeTypeConverter typeConverter; @@ -79,11 +109,110 @@ struct AdjointOpInterface } }; +struct BackpropOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return true; + } + + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const + { + auto backpropOp = cast(op); + + Location loc = backpropOp.getLoc(); + SmallVector gradients; + SmallVector argShadows; + // Conceptually a map from scalar result indices (w.r.t. other scalars) to the position in + // the overall list of returned gradients. + // For instance, a backprop op that returns (tensor, f64, tensor, f64, f64) will have + // scalarIndices = {1, 3, 4}. + SmallVector scalarIndices; + SmallVector scalarReturnTypes; + std::vector diffArgs = + computeDiffArgs(backpropOp.getArgs(), backpropOp.getDiffArgIndicesAttr()); + for (const auto &[idx, diffArg] : llvm::enumerate(diffArgs)) { + // Allocate buffers to place the differentiation results (gradients) into. Enzyme refers + // to these as shadow arguments. There is one result for each differentiable MemRef + // argument, with a matching shape and type. + if (isa(diffArg.getType())) { + Value shadow = generateAllocation(rewriter, loc, diffArg); + gradients.push_back(shadow); + argShadows.push_back(shadow); + } + else if (isa(diffArg.getType())) { + scalarReturnTypes.push_back(diffArg.getType()); + scalarIndices.push_back(idx); + // Put a null placeholder value that will be filled in with the result of the + // bufferized BackpropOp. + gradients.push_back(Value()); + } + } + + // Enzyme requires buffers for the primal outputs as well, even though we don't need their + // values. We'll mark them dupNoNeed later on to allow Enzyme to optimize away their + // computation. + SmallVector calleeResults, resShadows; + ValueRange cotangents = backpropOp.getCotangents(); + generateAllocations(rewriter, loc, calleeResults, cotangents); + // Enzyme mutates the result shadows but the cotangent tensors must be immutable, so we + // create copies to pass into Enzyme. Concretely, this issue pops up with multiple + // BackpropOps that have the same cotangent tensor due to a CSE effect from one-shot + // bufferization. + generateAllocations(rewriter, loc, resShadows, cotangents); + for (const auto &[cotangent, resShadow] : llvm::zip(cotangents, resShadows)) { + rewriter.create(loc, cotangent, resShadow); + } + + DenseIntElementsAttr diffArgIndicesAttr = backpropOp.getDiffArgIndices().value_or(nullptr); + auto bufferizedBackpropOp = rewriter.create( + loc, TypeRange{}, scalarReturnTypes, backpropOp.getCalleeAttr(), backpropOp.getArgs(), argShadows, + calleeResults, resShadows, diffArgIndicesAttr, backpropOp.getKeepValueResultsAttr()); + + // Fill in the null placeholders. + for (const auto &[idx, scalarResult] : + llvm::enumerate(bufferizedBackpropOp.getGradients())) { + gradients[scalarIndices[idx]] = scalarResult; + } + + // BackpropOp can return two results for value_and_grad: values and gradients + // or only one for grad: gradients + SmallVector results; + { + // If we are lowering a value_and_grad operation, then take values from the + // calleeResults + if (!backpropOp.getVals().empty()) { + results.insert(results.end(), calleeResults.begin(), calleeResults.end()); + } + results.insert(results.end(), gradients.begin(), gradients.end()); + } + + rewriter.replaceOp(op, results); + return success(); + } +}; + } // namespace void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, GradientDialect *dialect) { AdjointOp::attachInterface(*ctx); + BackpropOp::attachInterface(*ctx); }); } \ No newline at end of file From af0da0c7af39022adbd3d12dee82eb88fbf7579a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 5 Sep 2024 11:48:59 -0400 Subject: [PATCH 060/183] Add restrict to ToTensorOp --- .../lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp | 4 ++-- .../Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp | 4 ++-- mlir/test/Gradient/PS_QuantumGradientTest.mlir | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp b/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp index 8cbf83224c..e84a619576 100644 --- a/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp +++ b/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp @@ -147,7 +147,7 @@ func::FuncOp genSplitPreprocessed(PatternRewriter &rewriter, Location loc, func: PatternRewriter::InsertionGuard insertGuard(rewriter); rewriter.setInsertionPointToStart(&splitFn.getBody().front()); Value paramsBuffer = rewriter.create(loc, paramsBufferType, paramCount); - Value paramsTensor = rewriter.create(loc, paramsBuffer); + Value paramsTensor = rewriter.create(loc, paramsBuffer, /*restrict=*/true); qnodeQuantumArgs.push_back(paramsTensor); MemRefType paramsProcessedType = MemRefType::get({}, rewriter.getIndexType()); @@ -290,7 +290,7 @@ func::FuncOp genArgMapFunction(PatternRewriter &rewriter, Location loc, func::Fu PatternRewriter::InsertionGuard insertionGuard(rewriter); rewriter.setInsertionPoint(returnOp); Value paramsVector = - rewriter.create(loc, paramsVectorType, paramsBuffer); + rewriter.create(loc, paramsVectorType, paramsBuffer, /*restrict=*/true) ; returnOp.getOperandsMutable().assign(paramsVector); } }); diff --git a/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp b/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp index 15f005e464..e0a1670b9a 100644 --- a/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp +++ b/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp @@ -58,7 +58,7 @@ static std::vector computePartialDerivative(PatternRewriter &rewriter, Lo { constexpr double shift = PI / 2; ShapedType shiftVectorType = RankedTensorType::get({numShifts}, rewriter.getF64Type()); - Value selectorVector = rewriter.create(loc, selectorBuffer); + Value selectorVector = rewriter.create(loc, selectorBuffer, /*restrict=*/true); // Define the shift vectors (pos/neg) as sparse tensor constants. DenseElementsAttr nonZeroIndices = rewriter.getI64TensorAttr(currentShift); @@ -285,7 +285,7 @@ func::FuncOp ParameterShiftLowering::genQGradFunction(PatternRewriter &rewriter, gradientTensors.reserve(gradResTypes.size()); for (Value gradientBuffer : gradientBuffers) { gradientTensors.push_back( - rewriter.create(loc, gradientBuffer)); + rewriter.create(loc, gradientBuffer, /*restrict=*/true)); } op->setOperands(gradientTensors); } diff --git a/mlir/test/Gradient/PS_QuantumGradientTest.mlir b/mlir/test/Gradient/PS_QuantumGradientTest.mlir index c13ca339d4..8c8034981b 100644 --- a/mlir/test/Gradient/PS_QuantumGradientTest.mlir +++ b/mlir/test/Gradient/PS_QuantumGradientTest.mlir @@ -425,7 +425,7 @@ func.func @multi_res_circuit(%arg0: f64) -> (f64, tensor<2xf64>) attributes {qno %r = quantum.alloc(1) : !quantum.reg %q_0 = quantum.extract %r[%idx] : !quantum.reg -> !quantum.bit - // CHECK: [[SEL:%.+]] = bufferization.to_tensor [[SELBUFF]] : memref<0xindex> + // CHECK: [[SEL:%.+]] = bufferization.to_tensor [[SELBUFF]] restrict : memref<0xindex> // CHECK: [[EVALPOS:%.+]]:2 = call @multi_res_circuit.shifted(%arg0, [[SHIFTPOS]], [[SEL]]) : {{.+}} -> (f64, tensor<2xf64>) // CHECK: [[EVALNEG:%.+]]:2 = call @multi_res_circuit.shifted(%arg0, [[SHIFTNEG]], [[SEL]]) : {{.+}} -> (f64, tensor<2xf64>) // CHECK: [[DIFF0:%.+]] = arith.subf [[EVALPOS]]#0, [[EVALNEG]]#0 From cae09e05b40ec20b8a0f3d24c4f20686dbd75ca9 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 5 Sep 2024 14:46:27 -0400 Subject: [PATCH 061/183] Correct bufferized args in backpropOp --- .../BufferizableOpInterfaceImpl.cpp | 44 +++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 50393fc416..cc9b403139 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -104,7 +104,7 @@ struct AdjointOpInterface rewriter.create(loc, TypeRange{}, adjointOp.getCalleeAttr(), adjointOp.getGradSize(), bufferArgs, memrefValues); - bufferization::replaceOpWithBufferizedValues(rewriter, op, memrefValues); + bufferization::replaceOpWithBufferizedValues(rewriter, op, memrefValues); return success(); } }; @@ -144,8 +144,24 @@ struct BackpropOpInterface // scalarIndices = {1, 3, 4}. SmallVector scalarIndices; SmallVector scalarReturnTypes; + + SmallVector bufferArgs; + ValueRange operands = backpropOp.getArgs(); + for (Value operand : operands) { + if(isa(operand.getType())) { + FailureOr opBuffer = getBuffer(rewriter, operand, options); + if (failed(opBuffer)) + return failure(); + bufferArgs.push_back(*opBuffer); + } else { + bufferArgs.push_back(operand); + } + + } + std::vector diffArgs = - computeDiffArgs(backpropOp.getArgs(), backpropOp.getDiffArgIndicesAttr()); + computeDiffArgs(bufferArgs, backpropOp.getDiffArgIndicesAttr()); + for (const auto &[idx, diffArg] : llvm::enumerate(diffArgs)) { // Allocate buffers to place the differentiation results (gradients) into. Enzyme refers // to these as shadow arguments. There is one result for each differentiable MemRef @@ -169,21 +185,33 @@ struct BackpropOpInterface // computation. SmallVector calleeResults, resShadows; ValueRange cotangents = backpropOp.getCotangents(); - generateAllocations(rewriter, loc, calleeResults, cotangents); + SmallVector bufferCotangentsList; + for (Value operand : cotangents) { + FailureOr opBuffer = getBuffer(rewriter, operand, options); + if (failed(opBuffer)) + return failure(); + bufferCotangentsList.push_back(*opBuffer); + } + mlir::ValueRange bufferCotangents(bufferCotangentsList); + + generateAllocations(rewriter, loc, calleeResults, bufferCotangents); // Enzyme mutates the result shadows but the cotangent tensors must be immutable, so we // create copies to pass into Enzyme. Concretely, this issue pops up with multiple // BackpropOps that have the same cotangent tensor due to a CSE effect from one-shot // bufferization. - generateAllocations(rewriter, loc, resShadows, cotangents); - for (const auto &[cotangent, resShadow] : llvm::zip(cotangents, resShadows)) { + generateAllocations(rewriter, loc, resShadows, bufferCotangents); + for (const auto &[cotangent, resShadow] : llvm::zip(bufferCotangents, resShadows)) { rewriter.create(loc, cotangent, resShadow); } + + llvm::outs() << "======================\n"; + llvm::outs() << scalarReturnTypes; + llvm::outs() << "======================\n"; DenseIntElementsAttr diffArgIndicesAttr = backpropOp.getDiffArgIndices().value_or(nullptr); auto bufferizedBackpropOp = rewriter.create( - loc, TypeRange{}, scalarReturnTypes, backpropOp.getCalleeAttr(), backpropOp.getArgs(), argShadows, + loc, TypeRange{}, scalarReturnTypes, backpropOp.getCalleeAttr(), bufferArgs, argShadows, calleeResults, resShadows, diffArgIndicesAttr, backpropOp.getKeepValueResultsAttr()); - // Fill in the null placeholders. for (const auto &[idx, scalarResult] : llvm::enumerate(bufferizedBackpropOp.getGradients())) { @@ -202,7 +230,7 @@ struct BackpropOpInterface results.insert(results.end(), gradients.begin(), gradients.end()); } - rewriter.replaceOp(op, results); + bufferization::replaceOpWithBufferizedValues(rewriter, op, results); return success(); } }; From 1182aca021d6dd182ac37f792dad6202273ae782 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 12:02:56 -0400 Subject: [PATCH 062/183] Use identity-layout-map --- frontend/catalyst/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 91df7f9716..3333eb4e9b 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -194,7 +194,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "BufferizationPass", [ "eliminate-empty-tensors", - "one-shot-bufferize{bufferize-function-boundaries}", + "one-shot-bufferize{bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map}", # "func.func(buffer-hoisting)", # "func.func(buffer-loop-hoisting)", # "buffer-results-to-out-params", From 67d1d6a5a2f9f1d70dfd06bf96b9148f7c58fe60 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 12:07:25 -0400 Subject: [PATCH 063/183] Update llvm patch with new ReturnLike mechanism --- mlir/patches/moduleOp-bufferization.patch | 335 +++++++++++++++++----- 1 file changed, 261 insertions(+), 74 deletions(-) diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch index 75a8a65639..51cc79536e 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/moduleOp-bufferization.patch @@ -1,5 +1,53 @@ +diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h +index 0b91d3d675b7..8bed0dfc5814 100644 +--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h ++++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h +@@ -50,24 +50,24 @@ struct FuncAnalysisState : public OneShotAnalysisState::Extension { + + /// A mapping of ReturnOp OpOperand indices to equivalent FuncOp BBArg + /// indices. +- DenseMap equivalentFuncArgs; ++ DenseMap equivalentFuncArgs; + + /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices. +- DenseMap aliasingReturnVals; ++ DenseMap aliasingReturnVals; + + /// A set of all read BlockArguments of FuncOps. +- DenseMap readBbArgs; ++ DenseMap readBbArgs; + + /// A set of all written-to BlockArguments of FuncOps. +- DenseMap writtenBbArgs; ++ DenseMap writtenBbArgs; + + /// Keep track of which FuncOps are fully analyzed or currently being + /// analyzed. +- DenseMap analyzedFuncOps; ++ DenseMap analyzedFuncOps; + + /// This function is called right before analyzing the given FuncOp. It + /// initializes the data structures for the FuncOp in this state object. +- void startFunctionAnalysis(FuncOp funcOp); ++ void startFunctionAnalysis(FunctionOpInterface funcOp); + }; + + void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); +diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +index 053ea7935260..fed1c49d22be 100644 +--- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp ++++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +@@ -22,7 +22,7 @@ namespace mlir { + namespace bufferization { + namespace func_ext { + +-void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) { ++void FuncAnalysisState::startFunctionAnalysis(FunctionOpInterface funcOp) { + analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress; + auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping()); + auto createdAliasingResults = diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp -index 0a4072605c26..5231fe860553 100644 +index 0a4072605c26..bd054ac4e7b8 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; @@ -11,23 +59,148 @@ index 0a4072605c26..5231fe860553 100644 /// Get or create FuncAnalysisState. static FuncAnalysisState & -@@ -247,6 +247,15 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { - SymbolTable::lookupNearestSymbolFrom(callOp, sym)); +@@ -88,15 +88,19 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { + + /// Return the unique ReturnOp that terminates `funcOp`. + /// Return nullptr if there is no such unique ReturnOp. +-static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) { +- func::ReturnOp returnOp; +- for (Block &b : funcOp.getBody()) { +- if (auto candidateOp = dyn_cast(b.getTerminator())) { ++/// Return `funcOp` it self if there is no ReturnOp. ++static Operation* getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { ++ Operation *returnOp = nullptr; ++ for (Block &b : funcOp.getFunctionBody()) { ++ auto candidateOp = b.getTerminator(); ++ if (candidateOp && candidateOp->hasTrait()) { + if (returnOp) + return nullptr; + returnOp = candidateOp; + } + } ++ if (!returnOp) ++ return funcOp; + return returnOp; } +@@ -126,16 +130,15 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, + /// Store function BlockArguments that are equivalent to/aliasing a returned + /// value in FuncAnalysisState. + static LogicalResult +-aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, ++aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, OneShotAnalysisState &state, + FuncAnalysisState &funcState) { +- if (funcOp.getBody().empty()) { ++ if (funcOp.getFunctionBody().empty()) { + // No function body available. Conservatively assume that every tensor + // return value may alias with any tensor bbArg. +- FunctionType type = funcOp.getFunctionType(); +- for (const auto &inputIt : llvm::enumerate(type.getInputs())) { ++ for (const auto &inputIt : llvm::enumerate(funcOp.getArgumentTypes())) { + if (!isa(inputIt.value())) + continue; +- for (const auto &resultIt : llvm::enumerate(type.getResults())) { ++ for (const auto &resultIt : llvm::enumerate(funcOp.getResultTypes())) { + if (!isa(resultIt.value())) + continue; + int64_t returnIdx = resultIt.index(); +@@ -147,7 +150,10 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, + } + + // Support only single return-terminated block in the function. +- func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); ++ // If funcOp has no returnOp, skip the following analysis. ++ Operation *returnOp = getAssumedUniqueReturnOp(funcOp); ++ if (returnOp == funcOp) ++ return success(); + assert(returnOp && "expected func with single return op"); + + for (OpOperand &returnVal : returnOp->getOpOperands()) +@@ -168,7 +174,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, + return success(); + } + +-static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, ++static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, bool isRead, + bool isWritten) { + OpBuilder b(funcOp.getContext()); + Attribute accessType; +@@ -189,12 +195,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, + /// function with unknown ops, we conservatively assume that such ops bufferize + /// to a read + write. + static LogicalResult +-funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, ++funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, OneShotAnalysisState &state, + FuncAnalysisState &funcState) { +- for (int64_t idx = 0, e = funcOp.getFunctionType().getNumInputs(); idx < e; ++ for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; + ++idx) { + // Skip non-tensor arguments. +- if (!isa(funcOp.getFunctionType().getInput(idx))) ++ if (!isa(funcOp.getArgumentTypes()[idx])) + continue; + bool isRead; + bool isWritten; +@@ -204,7 +210,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, + StringRef str = accessAttr.getValue(); + isRead = str == "read" || str == "read-write"; + isWritten = str == "write" || str == "read-write"; +- } else if (funcOp.getBody().empty()) { ++ } else if (funcOp.getFunctionBody().empty()) { + // If the function has no body, conservatively assume that all args are + // read + written. + isRead = true; +@@ -230,20 +236,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, + + /// Remove bufferization attributes on FuncOp arguments. + static void removeBufferizationAttributes(BlockArgument bbArg) { +- auto funcOp = cast(bbArg.getOwner()->getParentOp()); ++ auto funcOp = cast(bbArg.getOwner()->getParentOp()); + funcOp.removeArgAttr(bbArg.getArgNumber(), + BufferizationDialect::kBufferLayoutAttrName); + funcOp.removeArgAttr(bbArg.getArgNumber(), + BufferizationDialect::kWritableAttrName); + } + +-/// Return the func::FuncOp called by `callOp`. +-static func::FuncOp getCalledFunction(func::CallOp callOp) { +static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { -+ SymbolRefAttr sym = -+ llvm::dyn_cast_if_present(callOp.getCallableForCallee()); -+ if (!sym) -+ return nullptr; + SymbolRefAttr sym = + llvm::dyn_cast_if_present(callOp.getCallableForCallee()); + if (!sym) + return nullptr; +- return dyn_cast_or_null( + return dyn_cast_or_null( -+ SymbolTable::lookupNearestSymbolFrom(callOp, sym)); -+} -+ - /// Gather equivalence info of CallOps. + SymbolTable::lookupNearestSymbolFrom(callOp, sym)); + } + +@@ -251,12 +256,12 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. -@@ -277,10 +286,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, + // TODO: This does not handle cyclic function call graphs etc. +-static void equivalenceAnalysis(func::FuncOp funcOp, ++static void equivalenceAnalysis(FunctionOpInterface funcOp, + OneShotAnalysisState &state, + FuncAnalysisState &funcState) { +- funcOp->walk([&](func::CallOp callOp) { +- func::FuncOp calledFunction = getCalledFunction(callOp); +- assert(calledFunction && "could not retrieved called func::FuncOp"); ++ funcOp->walk([&](CallOpInterface callOp) { ++ FunctionOpInterface calledFunction = getCalledFunction(callOp); ++ assert(calledFunction && "could not retrieved called FunctionOpInterface"); + + // No equivalence info available for the called function. + if (!funcState.equivalentFuncArgs.count(calledFunction)) +@@ -267,7 +272,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, + int64_t bbargIdx = it.second; + if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) + continue; +- Value returnVal = callOp.getResult(returnIdx); ++ Value returnVal = callOp->getResult(returnIdx); + Value argVal = callOp->getOperand(bbargIdx); + state.unionEquivalenceClasses(returnVal, argVal); + } +@@ -277,10 +282,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, } /// Return "true" if the given function signature has tensor semantics. @@ -41,7 +214,7 @@ index 0a4072605c26..5231fe860553 100644 llvm::IsaPred); } -@@ -291,26 +300,30 @@ static bool hasTensorSignature(func::FuncOp funcOp) { +@@ -291,17 +296,17 @@ static bool hasTensorSignature(func::FuncOp funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, @@ -58,33 +231,56 @@ index 0a4072605c26..5231fe860553 100644 - if (!funcOp.getBody().empty()) { - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); - if (!returnOp) -- return funcOp->emitError() -- << "cannot bufferize a FuncOp with tensors and " -- "without a unique ReturnOp"; + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { -+ // Only handle ReturnOp if funcOp is exactly the FuncOp type. -+ if(isa(funcOp)) { -+ FuncOp funcOpCasted = cast(funcOp); -+ if (!funcOpCasted.getBody().empty()) { -+ func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOpCasted); -+ if (!returnOp) -+ return funcOp->emitError() -+ << "cannot bufferize a FuncOp with tensors and " -+ "without a unique ReturnOp"; -+ } - } ++ if (!funcOp.getFunctionBody().empty()) { ++ Operation *returnOp = getAssumedUniqueReturnOp(funcOp); ++ if (!returnOp && returnOp != funcOp) + return funcOp->emitError() + << "cannot bufferize a FuncOp with tensors and " + "without a unique ReturnOp"; +@@ -309,9 +314,9 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](func::CallOp callOp) -> WalkResult { - func::FuncOp calledFunction = getCalledFunction(callOp); +- assert(calledFunction && "could not retrieved called func::FuncOp"); + return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { + FunctionOpInterface calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called func::FuncOp"); ++ assert(calledFunction && "could not retrieved called FunctionOpInterface"); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. -@@ -379,7 +392,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, + if (!hasTensorSignature(calledFunction)) +@@ -349,11 +354,15 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, + /// most generic layout map as function return types. After bufferizing the + /// entire function body, a more concise memref type can potentially be used for + /// the return type of the function. +-static void foldMemRefCasts(func::FuncOp funcOp) { +- if (funcOp.getBody().empty()) ++static void foldMemRefCasts(FunctionOpInterface funcOp) { ++ if (funcOp.getFunctionBody().empty()) ++ return; ++ ++ Operation *returnOp = getAssumedUniqueReturnOp(funcOp); ++ ++ if (!returnOp || returnOp == funcOp) + return; + +- func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + SmallVector resultTypes; + + for (OpOperand &operand : returnOp->getOpOperands()) { +@@ -366,7 +375,7 @@ static void foldMemRefCasts(func::FuncOp funcOp) { + } + + auto newFuncType = FunctionType::get( +- funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes); ++ funcOp.getContext(), funcOp.getArgumentTypes(), resultTypes); + funcOp.setType(newFuncType); + } + +@@ -379,7 +388,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. @@ -93,49 +289,26 @@ index 0a4072605c26..5231fe860553 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -388,27 +401,33 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -388,7 +397,8 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (func::FuncOp funcOp : orderedFuncOps) { -- if (!state.getOptions().isOpAllowed(funcOp)) + for (FunctionOpInterface funcOp : orderedFuncOps) { + -+ // The following analysis is specific to the FuncOp type. -+ if(!isa(funcOp)) -+ continue; -+ FuncOp funcOpCasted = cast(funcOp); -+ -+ if (!state.getOptions().isOpAllowed(funcOpCasted)) + if (!state.getOptions().isOpAllowed(funcOp)) continue; - // Now analyzing function. -- funcState.startFunctionAnalysis(funcOp); -+ funcState.startFunctionAnalysis(funcOpCasted); - - // Gather equivalence info for CallOps. -- equivalenceAnalysis(funcOp, state, funcState); -+ equivalenceAnalysis(funcOpCasted, state, funcState); +@@ -416,7 +426,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, - // Analyze funcOp. -- if (failed(analyzeOp(funcOp, state, statistics))) -+ if (failed(analyzeOp(funcOpCasted, state, statistics))) - return failure(); - - // Run some extra function analyses. -- if (failed(aliasingFuncOpBBArgsAnalysis(funcOp, state, funcState)) || -- failed(funcOpBbArgReadWriteAnalysis(funcOp, state, funcState))) -+ if (failed(aliasingFuncOpBBArgsAnalysis(funcOpCasted, state, funcState)) || -+ failed(funcOpBbArgReadWriteAnalysis(funcOpCasted, state, funcState))) - return failure(); - - // Mark op as fully analyzed. -- funcState.analyzedFuncOps[funcOp] = FuncOpAnalysisState::Analyzed; -+ funcState.analyzedFuncOps[funcOpCasted] = FuncOpAnalysisState::Analyzed; - } - - return success(); -@@ -430,7 +449,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( + void mlir::bufferization::removeBufferizationAttributesInModule( + ModuleOp moduleOp) { +- moduleOp.walk([&](func::FuncOp op) { ++ moduleOp.walk([&](FunctionOpInterface op) { + for (BlockArgument bbArg : op.getArguments()) + removeBufferizationAttributes(bbArg); + }); +@@ -430,7 +440,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. @@ -144,7 +317,7 @@ index 0a4072605c26..5231fe860553 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -439,11 +458,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -439,11 +449,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. @@ -158,14 +331,28 @@ index 0a4072605c26..5231fe860553 100644 // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; -@@ -456,8 +475,8 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( - } - - // Change buffer return types to more precise layout maps. -- if (options.inferFunctionResultLayout) -- foldMemRefCasts(funcOp); -+ if (options.inferFunctionResultLayout && isa(funcOp)) -+ foldMemRefCasts(cast(funcOp)); - } - +@@ -463,7 +473,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. + for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { + // Functions were already bufferized. +- if (isa(&op)) ++ if (isa(&op)) + continue; + if (failed(bufferizeOp(&op, options, statistics))) + return failure(); +@@ -490,12 +500,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( + // FuncOps whose names are specified in options.noAnalysisFuncFilter will + // not be analyzed. Ops in these FuncOps will not be analyzed as well. + OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { +- auto func = dyn_cast(op); ++ auto func = dyn_cast(op); + if (!func) +- func = op->getParentOfType(); ++ func = op->getParentOfType(); + if (func) + return llvm::is_contained(options.noAnalysisFuncFilter, +- func.getSymName()); ++ func.getName()); + return false; + }; + OneShotBufferizationOptions updatedOptions(options); From c6d834cfca547087e5661d13e323039eea208948 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 12:17:43 -0400 Subject: [PATCH 064/183] Cleanup --- mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index cc9b403139..c4a04ebae1 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -204,10 +204,6 @@ struct BackpropOpInterface rewriter.create(loc, cotangent, resShadow); } - - llvm::outs() << "======================\n"; - llvm::outs() << scalarReturnTypes; - llvm::outs() << "======================\n"; DenseIntElementsAttr diffArgIndicesAttr = backpropOp.getDiffArgIndices().value_or(nullptr); auto bufferizedBackpropOp = rewriter.create( loc, TypeRange{}, scalarReturnTypes, backpropOp.getCalleeAttr(), bufferArgs, argShadows, From f8a1a8ff5cebf98d03535f584c964967c6c9ec8f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 12:44:07 -0400 Subject: [PATCH 065/183] Include convert-elementwise-to-linalg back to bufferization pass --- frontend/catalyst/compiler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 3333eb4e9b..669a6aa2a6 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -194,6 +194,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "BufferizationPass", [ "eliminate-empty-tensors", + "convert-elementwise-to-linalg", "one-shot-bufferize{bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map}", # "func.func(buffer-hoisting)", # "func.func(buffer-loop-hoisting)", From 9a1a79121f19cbcc9adb907deb350e235d1db5ba Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 13:07:06 -0400 Subject: [PATCH 066/183] Add ForwardOp Bufferization --- .../BufferizableOpInterfaceImpl.cpp | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index c4a04ebae1..6007407420 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -231,6 +231,128 @@ struct BackpropOpInterface } }; +struct ForwardOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return true; + } + + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const + { + auto forwardOp = cast(op); + + auto argc = forwardOp.getArgc(); + auto resc = forwardOp.getResc(); + SmallVector inputs; + SmallVector differentials; + SmallVector outputs; + SmallVector cotangents; + + Block *block; + rewriter.modifyOpInPlace(op, [&] { block = forwardOp.addEntryBlock(); }); + + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(block); + auto params = forwardOp.getArguments(); + + for (size_t i = 0; i < argc * 2; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? differentials.push_back(val) : inputs.push_back(val); + } + + auto upperLimit = (argc * 2) + (resc * 2); + for (size_t i = argc * 2; i < upperLimit; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? cotangents.push_back(val) : outputs.push_back(val); + } + + auto implAttr = forwardOp.getImplementationAttr(); + auto impl = forwardOp.getImplementation(); + auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); + auto implResTy = implOp.getResultTypes(); + Location loc = forwardOp.getLoc(); + + SmallVector tensorInputs; + for (auto input : inputs) { + Value tensorIn = rewriter.create(loc, input); + tensorInputs.push_back(tensorIn); + } + + auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); + SmallVector tensorOutputs(callOp.getResults()); + + for (auto [memrefOutput, tensorOutput] : llvm::zip(outputs, tensorOutputs)) { + Value castVal = rewriter.create(loc, memrefOutput.getType(), + tensorOutput); + rewriter.create(loc, castVal, memrefOutput); + } + + auto tapeCount = forwardOp.getTape(); + SmallVector tapeOutputs; + tapeOutputs.insert(tapeOutputs.begin(), tensorOutputs.end() - tapeCount, + tensorOutputs.end()); + + SmallVector tapeMemrefOutputs; + for (auto [tapeTensorOutput, memrefTapeOutput] : + llvm::zip(tapeOutputs, forwardOp.getResultTypes())) { + Value castVal = + rewriter.create(loc, memrefTapeOutput, tapeTensorOutput); + tapeMemrefOutputs.push_back(castVal); + } + + auto F = rewriter.getIntegerAttr(rewriter.getI1Type(), 0); + bufferization::replaceOpWithNewBufferizedOp(rewriter, op, tapeMemrefOutputs, F); + + return success(); + } +}; + +struct ReverseOpInterface + : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return false; + } + + bufferization::AliasingValueList + getAliasingValues(Operation *op, OpOperand &opOperand, + const bufferization::AnalysisState &state) const + { + return {}; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const bufferization::BufferizationOptions &options) const + { + return success(); + } +}; + } // namespace void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) From 3aead9cfe8ea7c233fb4b43354cad1e04bbd9df6 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 13:10:57 -0400 Subject: [PATCH 067/183] Add reverseOp Bufferization --- .../BufferizableOpInterfaceImpl.cpp | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 6007407420..be6a76d9a1 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -336,7 +336,7 @@ struct ReverseOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return false; + return true; } bufferization::AliasingValueList @@ -349,6 +349,71 @@ struct ReverseOpInterface LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { + auto reverseOp = cast(op); + + auto argc = reverseOp.getArgc(); + auto resc = reverseOp.getResc(); + SmallVector inputs; + SmallVector differentials; + SmallVector outputs; + SmallVector cotangents; + SmallVector tapeElements; + + Block *block; + rewriter.modifyOpInPlace(op, [&] { block = reverseOp.addEntryBlock(); }); + + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(block); + auto params = reverseOp.getArguments(); + + for (size_t i = 0; i < argc * 2; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? differentials.push_back(val) : inputs.push_back(val); + } + + auto upperLimit = (argc * 2) + (resc * 2); + for (size_t i = argc * 2; i < upperLimit; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? cotangents.push_back(val) : outputs.push_back(val); + } + + auto tapeCount = reverseOp.getTape(); + auto uppestLimit = upperLimit + tapeCount; + for (size_t i = upperLimit; i < uppestLimit; i++) { + tapeElements.push_back(params[i]); + } + + auto implAttr = reverseOp.getImplementationAttr(); + auto impl = reverseOp.getImplementation(); + auto implOp = SymbolTable::lookupNearestSymbolFrom(reverseOp, implAttr); + auto implResTy = implOp.getResultTypes(); + Location loc = reverseOp.getLoc(); + + SmallVector tensorInputs; + for (auto tapeElement : tapeElements) { + Value tensorIn = rewriter.create(loc, tapeElement); + tensorInputs.push_back(tensorIn); + } + + for (auto cotangent : cotangents) { + Value tensorIn = rewriter.create(loc, cotangent); + tensorInputs.push_back(tensorIn); + } + + auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); + SmallVector tensorOutputs(callOp.getResults()); + + for (auto [differential, tensorOutput] : llvm::zip(differentials, tensorOutputs)) { + Value castVal = rewriter.create(loc, differential.getType(), + tensorOutput); + rewriter.create(loc, castVal, differential); + } + + auto T = rewriter.getIntegerAttr(rewriter.getI1Type(), 1); + rewriter.create(loc, ValueRange{}, T); + return success(); } }; From 418f12248515ddb0abdd4232f90a944cd0100120 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 6 Sep 2024 13:14:18 -0400 Subject: [PATCH 068/183] Correct bufferization interface registration --- mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 2 ++ mlir/lib/Quantum/IR/QuantumDialect.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index be6a76d9a1..8f472ca399 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -425,5 +425,7 @@ void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRe registry.addExtension(+[](MLIRContext *ctx, GradientDialect *dialect) { AdjointOp::attachInterface(*ctx); BackpropOp::attachInterface(*ctx); + ForwardOp::attachInterface(*ctx); + ReverseOp::attachInterface(*ctx); }); } \ No newline at end of file diff --git a/mlir/lib/Quantum/IR/QuantumDialect.cpp b/mlir/lib/Quantum/IR/QuantumDialect.cpp index 385f4e0ae5..c8c3788efd 100644 --- a/mlir/lib/Quantum/IR/QuantumDialect.cpp +++ b/mlir/lib/Quantum/IR/QuantumDialect.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/IR/DialectImplementation.h" // needed for generated type parser #include "llvm/ADT/TypeSwitch.h" // needed for generated type parser @@ -43,6 +44,9 @@ void QuantumDialect::initialize() #define GET_OP_LIST #include "Quantum/IR/QuantumOps.cpp.inc" >(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// From 576ebe635f622e983af4da7825f347861155431c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 9 Sep 2024 12:23:18 -0400 Subject: [PATCH 069/183] Rebuild MemRefType without memory layout for BackpropOp --- frontend/catalyst/compiler.py | 2 +- mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 669a6aa2a6..6f1ac8933f 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,7 +195,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map}", + "one-shot-bufferize{bufferize-function-boundaries}", # "func.func(buffer-hoisting)", # "func.func(buffer-loop-hoisting)", # "buffer-results-to-out-params", diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 8f472ca399..e7605ce107 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -23,7 +23,9 @@ namespace { Value generateAllocation(OpBuilder &builder, Location loc, Value reference) { - auto memrefType = cast(reference.getType()); + auto origMemrefType = cast(reference.getType()); + // Rebuild MemRefType without memory layout. + auto memrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); // Get dynamic dimension sizes from the provided reference value if necessary. SmallVector dynamicDims; if (!memrefType.hasStaticShape()) { From 3c4056cb65c2000459d9c61e8467e325d5607faa Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 9 Sep 2024 13:35:02 -0400 Subject: [PATCH 070/183] Add back convert-arraylist-to-memref --- frontend/catalyst/compiler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 6f1ac8933f..630bc5c698 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -196,6 +196,10 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "eliminate-empty-tensors", "convert-elementwise-to-linalg", "one-shot-bufferize{bufferize-function-boundaries}", + "convert-arraylist-to-memref", + "convert-bufferization-to-memref", + "canonicalize", + "cp-global-memref", # "func.func(buffer-hoisting)", # "func.func(buffer-loop-hoisting)", # "buffer-results-to-out-params", From 68d6233483563545b1237c3eacdfeffee6ecd02f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 9 Sep 2024 15:32:29 -0400 Subject: [PATCH 071/183] Include scf-for-preprocessing from LLVM PR#87594 --- mlir/patches/scf-for-preprocessing.patch | 128 +++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 mlir/patches/scf-for-preprocessing.patch diff --git a/mlir/patches/scf-for-preprocessing.patch b/mlir/patches/scf-for-preprocessing.patch new file mode 100644 index 0000000000..769556de62 --- /dev/null +++ b/mlir/patches/scf-for-preprocessing.patch @@ -0,0 +1,128 @@ +diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h +index 90b315e83a8cfd..6107219ea94ae1 100644 +--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h ++++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h +@@ -23,6 +23,9 @@ namespace mlir { + /// Creates a pass that bufferizes the SCF dialect. + std::unique_ptr createSCFBufferizePass(); + ++/// Creates a pass that preprocesses SCF loop before One-Shot Bufferize. ++std::unique_ptr createSCFLoopBufferizationPreprocessingPass(); ++ + /// Creates a pass that specializes for loop for unrolling and + /// vectorization. + std::unique_ptr createForLoopSpecializationPass(); +diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td +index 350611ad86873d..94d3e51a1c9044 100644 +--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td ++++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td +@@ -18,6 +18,27 @@ def SCFBufferize : Pass<"scf-bufferize"> { + "memref::MemRefDialect"]; + } + ++def SCFLoopBufferizationPreprocessing ++ : Pass<"scf-loop-bufferization-preprocessing"> { ++ let summary = "Preprocess loops before One-Shot Bufferize"; ++ ++ let description = [{ ++ Preprocess `scf.for` loops before running One-Shot Bufferize to support ++ loops where a yielded tensor is not equivalent to the respective iter_arg. ++ Such IR is currently not supported by One-Shot Bufferize. ++ ++ This pass inserts a `bufferization.materialize_in_destination` op for every ++ yielded tensor, such that the yielded value is guaranteed to materialize in ++ the future buffer of the iter_arg; this is done by copying the tensor ++ contents into the iter_arg buffer. Such memcpys are a no-op in case the ++ tensor contents already materialize in the iter_arg buffer. ++ }]; ++ ++ let constructor = "mlir::createSCFLoopBufferizationPreprocessingPass()"; ++ let dependentDialects = ["bufferization::BufferizationDialect", ++ "scf::SCFDialect"]; ++} ++ + // Note: Making these canonicalization patterns would require a dependency + // of the SCF dialect on the Affine/Tensor/MemRef dialects or vice versa. + def SCFForLoopCanonicalization +diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp +index 21c618ab633f60..727c4fc7c6396e 100644 +--- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp ++++ b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp +@@ -17,6 +17,7 @@ + + namespace mlir { + #define GEN_PASS_DEF_SCFBUFFERIZE ++#define GEN_PASS_DEF_SCFLOOPBUFFERIZATIONPREPROCESSING + #include "mlir/Dialect/SCF/Transforms/Passes.h.inc" + } // namespace mlir + +@@ -40,8 +41,40 @@ struct SCFBufferizePass : public impl::SCFBufferizeBase { + return signalPassFailure(); + }; + }; ++ ++struct SCFLoopBufferizationPreprocessingPass ++ : public impl::SCFLoopBufferizationPreprocessingBase< ++ SCFLoopBufferizationPreprocessingPass> { ++ void runOnOperation() override { ++ OpBuilder builder(getOperation()->getContext()); ++ getOperation()->walk([&](scf::YieldOp yieldOp) { ++ builder.setInsertionPoint(yieldOp); ++ // TODO: Support scf.while. ++ auto forOp = dyn_cast(yieldOp->getParentOp()); ++ if (!forOp) ++ return WalkResult::skip(); ++ for (OpOperand &operand : yieldOp->getOpOperands()) { ++ auto tensorType = dyn_cast(operand.get().getType()); ++ if (!tensorType) ++ continue; ++ auto bbArg = forOp.getRegionIterArgs()[operand.getOperandNumber()]; ++ Value materialized = ++ builder ++ .create( ++ yieldOp.getLoc(), tensorType, operand.get(), bbArg) ++ .getResult(); ++ operand.set(materialized); ++ } ++ return WalkResult::advance(); ++ }); ++ } ++}; + } // namespace + + std::unique_ptr mlir::createSCFBufferizePass() { + return std::make_unique(); + } ++ ++std::unique_ptr mlir::createSCFLoopBufferizationPreprocessingPass() { ++ return std::make_unique(); ++} +diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-preprocessing.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-preprocessing.mlir +new file mode 100644 +index 00000000000000..17661178245088 +--- /dev/null ++++ b/mlir/test/Dialect/SCF/one-shot-bufferize-preprocessing.mlir +@@ -0,0 +1,23 @@ ++// RUN: mlir-opt %s -scf-loop-bufferization-preprocessing -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" -canonicalize | FileCheck %s ++ ++// CHECK-LABEL: func @conflict_in_loop( ++// CHECK-SAME: %[[A:.*]]: memref<10xf32> ++func.func @conflict_in_loop(%A: tensor<10xf32>, %f: f32, %idx: index, %lb: index, %ub: index, %step: index) -> f32 { ++ // CHECK: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { ++ %r = scf.for %i = %lb to %ub step %step iter_args(%tA = %A) -> (tensor<10xf32>) { ++ // CHECK: %[[alloc:.*]] = memref.alloc() ++ // CHECK: memref.copy %[[A]], %[[alloc]] ++ // CHECK: memref.store %{{.*}}, %[[alloc]] ++ %0 = tensor.insert %f into %tA[%i] : tensor<10xf32> ++ // CHECK: %[[read:.*]] = memref.load %[[A]] ++ %read = tensor.extract %tA[%idx] : tensor<10xf32> ++ // CHECK: vector.print %[[read]] ++ vector.print %read : f32 ++ // CHECK: memref.copy %[[alloc]], %[[A]] ++ scf.yield %0 : tensor<10xf32> ++ } ++ ++ // CHECK: memref.load %[[A]] ++ %f0 = tensor.extract %r[%step] : tensor<10xf32> ++ return %f0 : f32 ++} From 31b1dc91fb6b688e9250b4cc2052f6856258b11e Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 9 Sep 2024 16:37:39 -0400 Subject: [PATCH 072/183] Add scf-loop-bufferization-preprocessing into bufferization pass --- frontend/catalyst/compiler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 630bc5c698..ed59e4646c 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,6 +195,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "convert-elementwise-to-linalg", + "scf-loop-bufferization-preprocessing", "one-shot-bufferize{bufferize-function-boundaries}", "convert-arraylist-to-memref", "convert-bufferization-to-memref", From 81f0a868a250173a75965c6551ede759bb45da0d Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 9 Sep 2024 16:38:11 -0400 Subject: [PATCH 073/183] Make llvm patches apply to directory --- mlir/Makefile | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mlir/Makefile b/mlir/Makefile index c553d3274b..4c8b9ef4f3 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -12,8 +12,9 @@ ENZYME_BUILD_DIR?=$(MK_DIR)/Enzyme/build RT_BUILD_DIR?=$(MK_DIR)/../runtime/build ENABLE_ASAN?=OFF BUILD_TYPE?=Release -LLVM_TARGET_FILE=$(MK_DIR)/llvm-project/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp -LLVM_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch +LLVM_ROOT=$(MK_DIR)/llvm-project +LLVM_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch +LLVM_SCF_PATCH_FILE=$(MK_DIR)/patches/scf-for-preprocessing.patch TARGET_FILE=$(MK_DIR)/mlir-hlo/mhlo/transforms/CMakeLists.txt PATCH_FILE=$(MK_DIR)/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -56,8 +57,11 @@ all: llvm mhlo enzyme dialects .PHONY: llvm llvm: @echo "build LLVM and MLIR enabling Python bindings" - @if patch --dry-run -p1 -N $(LLVM_TARGET_FILE) $(LLVM_PATCH_FILE) > /dev/null 2>&1; then \ - patch -p1 $(LLVM_TARGET_FILE) $(LLVM_PATCH_FILE); \ + @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then \ + patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); \ + fi + @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_SCF_PATCH_FILE) > /dev/null 2>&1; then \ + patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_SCF_PATCH_FILE); \ fi cmake -G Ninja -S llvm-project/llvm -B $(LLVM_BUILD_DIR) \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ From ee9b78da9519b1efa2912e9b66595532694a384b Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 10 Sep 2024 15:13:26 -0400 Subject: [PATCH 074/183] Remove functionOpInterface workaround for no ReturnLike from the patch --- mlir/patches/moduleOp-bufferization.patch | 622 ++++++++++++++++++++-- 1 file changed, 580 insertions(+), 42 deletions(-) diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch index 51cc79536e..25384f99fb 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/moduleOp-bufferization.patch @@ -47,7 +47,7 @@ index 053ea7935260..fed1c49d22be 100644 auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping()); auto createdAliasingResults = diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp -index 0a4072605c26..bd054ac4e7b8 100644 +index 0a4072605c26..ce90d907b4ca 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; @@ -59,7 +59,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 /// Get or create FuncAnalysisState. static FuncAnalysisState & -@@ -88,15 +88,19 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { +@@ -88,10 +88,11 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { /// Return the unique ReturnOp that terminates `funcOp`. /// Return nullptr if there is no such unique ReturnOp. @@ -67,7 +67,6 @@ index 0a4072605c26..bd054ac4e7b8 100644 - func::ReturnOp returnOp; - for (Block &b : funcOp.getBody()) { - if (auto candidateOp = dyn_cast(b.getTerminator())) { -+/// Return `funcOp` it self if there is no ReturnOp. +static Operation* getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { + Operation *returnOp = nullptr; + for (Block &b : funcOp.getFunctionBody()) { @@ -76,14 +75,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 if (returnOp) return nullptr; returnOp = candidateOp; - } - } -+ if (!returnOp) -+ return funcOp; - return returnOp; - } - -@@ -126,16 +130,15 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, +@@ -126,16 +127,15 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. static LogicalResult @@ -104,19 +96,16 @@ index 0a4072605c26..bd054ac4e7b8 100644 if (!isa(resultIt.value())) continue; int64_t returnIdx = resultIt.index(); -@@ -147,7 +150,10 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -147,7 +147,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, } // Support only single return-terminated block in the function. - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); -+ // If funcOp has no returnOp, skip the following analysis. + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); -+ if (returnOp == funcOp) -+ return success(); assert(returnOp && "expected func with single return op"); for (OpOperand &returnVal : returnOp->getOpOperands()) -@@ -168,7 +174,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -168,7 +168,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, return success(); } @@ -125,7 +114,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 bool isWritten) { OpBuilder b(funcOp.getContext()); Attribute accessType; -@@ -189,12 +195,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, +@@ -189,12 +189,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, /// function with unknown ops, we conservatively assume that such ops bufferize /// to a read + write. static LogicalResult @@ -141,7 +130,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 continue; bool isRead; bool isWritten; -@@ -204,7 +210,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -204,7 +204,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, StringRef str = accessAttr.getValue(); isRead = str == "read" || str == "read-write"; isWritten = str == "write" || str == "read-write"; @@ -150,7 +139,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 // If the function has no body, conservatively assume that all args are // read + written. isRead = true; -@@ -230,20 +236,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -230,20 +230,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, /// Remove bufferization attributes on FuncOp arguments. static void removeBufferizationAttributes(BlockArgument bbArg) { @@ -174,7 +163,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } -@@ -251,12 +256,12 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { +@@ -251,12 +250,12 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. // TODO: This does not handle cyclic function call graphs etc. @@ -191,7 +180,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 // No equivalence info available for the called function. if (!funcState.equivalentFuncArgs.count(calledFunction)) -@@ -267,7 +272,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, +@@ -267,7 +266,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, int64_t bbargIdx = it.second; if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) continue; @@ -200,7 +189,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 Value argVal = callOp->getOperand(bbargIdx); state.unionEquivalenceClasses(returnVal, argVal); } -@@ -277,10 +282,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, +@@ -277,10 +276,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, } /// Return "true" if the given function signature has tensor semantics. @@ -214,7 +203,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 llvm::IsaPred); } -@@ -291,17 +296,17 @@ static bool hasTensorSignature(func::FuncOp funcOp) { +@@ -291,16 +290,16 @@ static bool hasTensorSignature(func::FuncOp funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, @@ -230,16 +219,14 @@ index 0a4072605c26..bd054ac4e7b8 100644 - WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { - if (!funcOp.getBody().empty()) { - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); -- if (!returnOp) + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { + if (!funcOp.getFunctionBody().empty()) { + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); -+ if (!returnOp && returnOp != funcOp) + if (!returnOp) return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " - "without a unique ReturnOp"; -@@ -309,9 +314,9 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, +@@ -309,9 +308,9 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; @@ -252,7 +239,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. if (!hasTensorSignature(calledFunction)) -@@ -349,11 +354,15 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, +@@ -349,11 +348,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, /// most generic layout map as function return types. After bufferizing the /// entire function body, a more concise memref type can potentially be used for /// the return type of the function. @@ -260,18 +247,14 @@ index 0a4072605c26..bd054ac4e7b8 100644 - if (funcOp.getBody().empty()) +static void foldMemRefCasts(FunctionOpInterface funcOp) { + if (funcOp.getFunctionBody().empty()) -+ return; -+ -+ Operation *returnOp = getAssumedUniqueReturnOp(funcOp); -+ -+ if (!returnOp || returnOp == funcOp) return; - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); ++ Operation *returnOp = getAssumedUniqueReturnOp(funcOp); SmallVector resultTypes; for (OpOperand &operand : returnOp->getOpOperands()) { -@@ -366,7 +375,7 @@ static void foldMemRefCasts(func::FuncOp funcOp) { +@@ -366,7 +365,7 @@ static void foldMemRefCasts(func::FuncOp funcOp) { } auto newFuncType = FunctionType::get( @@ -280,7 +263,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 funcOp.setType(newFuncType); } -@@ -379,7 +388,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -379,7 +378,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. @@ -289,17 +272,16 @@ index 0a4072605c26..bd054ac4e7b8 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -388,7 +397,8 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -388,7 +387,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (func::FuncOp funcOp : orderedFuncOps) { + for (FunctionOpInterface funcOp : orderedFuncOps) { -+ if (!state.getOptions().isOpAllowed(funcOp)) continue; -@@ -416,7 +426,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -416,7 +415,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, void mlir::bufferization::removeBufferizationAttributesInModule( ModuleOp moduleOp) { @@ -308,7 +290,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 for (BlockArgument bbArg : op.getArguments()) removeBufferizationAttributes(bbArg); }); -@@ -430,7 +440,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -430,7 +429,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. @@ -317,7 +299,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -439,11 +449,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -439,11 +438,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. @@ -331,7 +313,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; -@@ -463,7 +473,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -463,7 +462,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { // Functions were already bufferized. @@ -340,7 +322,7 @@ index 0a4072605c26..bd054ac4e7b8 100644 continue; if (failed(bufferizeOp(&op, options, statistics))) return failure(); -@@ -490,12 +500,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( +@@ -490,12 +489,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // FuncOps whose names are specified in options.noAnalysisFuncFilter will // not be analyzed. Ops in these FuncOps will not be analyzed as well. OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { @@ -356,3 +338,559 @@ index 0a4072605c26..bd054ac4e7b8 100644 return false; }; OneShotBufferizationOptions updatedOptions(options); +diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +index 3c50a9e72d9d..588aa8a85a84 100644 +--- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir ++++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +@@ -1,4 +1,4 @@ +-// RUN: mlir-opt --transform-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s ++// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s -split-input-file -verify-diagnostics | FileCheck %s + + // Test One-Shot Bufferize. + +@@ -12,19 +12,21 @@ module attributes {transform.with_named_sequence} { + + // CHECK-LABEL: func @test_function( + // CHECK-SAME: %[[A:.*]]: tensor +-func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { +- %c0 = arith.constant 0 : index ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { ++ %c0 = arith.constant 0 : index + +- // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] +- // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] +- // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) +- // CHECK: memref.copy %[[A_memref]], %[[alloc]] +- // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] +- // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] +- %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor ++ // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] ++ // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] ++ // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) ++ // CHECK: memref.copy %[[A_memref]], %[[alloc]] ++ // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] ++ // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] ++ %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + +- // CHECK: return %[[res_tensor]] +- return %0 : tensor ++ // CHECK: return %[[res_tensor]] ++ return %0 : tensor ++ } + } + + // ----- +@@ -42,19 +44,21 @@ module attributes {transform.with_named_sequence} { + // CHECK-LABEL: func @test_function( + // CHECK-SAME: %[[A:.*]]: tensor + // CHECK-NOT: memref.copy +-func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { +- %c0 = arith.constant 0 : index ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { ++ %c0 = arith.constant 0 : index + +- // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] +- // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] +- // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) +- // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] +- // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] +- // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] +- %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor ++ // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] ++ // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] ++ // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) ++ // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] ++ // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] ++ // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] ++ %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + +- // CHECK: return %[[res_tensor]] +- return %0 : tensor ++ // CHECK: return %[[res_tensor]] ++ return %0 : tensor ++ } + } + + // ----- +@@ -72,13 +76,15 @@ module attributes {transform.with_named_sequence} { + + // CHECK-LABEL: func @test_function_analysis( + // CHECK-SAME: %[[A:.*]]: tensor +-func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { +- %c0 = arith.constant 0 : index +- // CHECK: vector.transfer_write +- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} +- // CHECK-SAME: tensor +- %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor +- return %0 : tensor ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { ++ %c0 = arith.constant 0 : index ++ // CHECK: vector.transfer_write ++ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} ++ // CHECK-SAME: tensor ++ %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor ++ return %0 : tensor ++ } + } + + // ----- +@@ -95,10 +101,12 @@ module attributes {transform.with_named_sequence} { + } + } + +-func.func @test_unknown_op_failure() -> (tensor) { +- // expected-error @+1 {{op was not bufferized}} +- %0 = "test.dummy_op"() : () -> (tensor) +- return %0 : tensor ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @test_unknown_op_failure() -> (tensor) { ++ // expected-error @+1 {{op was not bufferized}} ++ %0 = "test.dummy_op"() : () -> (tensor) ++ return %0 : tensor ++ } + } + + // ----- +@@ -111,7 +119,7 @@ module attributes {transform.with_named_sequence} { + } + } + +-module { ++module @payload attributes { transform.target_tag = "payload" } { + // CHECK-LABEL: func @test_function( + // CHECK-SAME: %[[A:.*]]: tensor + func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { +@@ -146,11 +154,13 @@ module attributes {transform.with_named_sequence} { + // CHECK-SAME: %[[A:.*]]: memref<12x9xf32>, + // CHECK-SAME: %[[B:.*]]: memref<9x6xf32>, + // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { +-func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { +- // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) +- %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> +- // CHECK: return %[[C]] : memref<12x6xf32> +- return %D : tensor<12x6xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { ++ // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) ++ %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> ++ // CHECK: return %[[C]] : memref<12x6xf32> ++ return %D : tensor<12x6xf32> ++ } + } + + // ----- +@@ -165,10 +175,12 @@ module attributes {transform.with_named_sequence} { + } + + // Expect `bufferization.empty_tensor_to_alloc_tensor` to replace the tensor.empty. +-func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { +- // CHECK: bufferization.alloc_tensor +- %0 = tensor.empty() : tensor<2x2xf32> +- return %0 : tensor<2x2xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { ++ // CHECK: bufferization.alloc_tensor ++ %0 = tensor.empty() : tensor<2x2xf32> ++ return %0 : tensor<2x2xf32> ++ } + } + + // ----- +@@ -185,13 +197,15 @@ module attributes {transform.with_named_sequence} { + // CHECK: tensor.extract_slice + // CHECK: linalg.fill + // CHECK: tensor.insert_slice +-func.func @empty_tensor_elimination( +- %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { +- %0 = tensor.empty() : tensor<5xf32> +- %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> +- %2 = tensor.insert_slice %1 into %t [1][5][1] +- : tensor<5xf32> into tensor<10xf32> +- return %2 : tensor<10xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @empty_tensor_elimination( ++ %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { ++ %0 = tensor.empty() : tensor<5xf32> ++ %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> ++ %2 = tensor.insert_slice %1 into %t [1][5][1] ++ : tensor<5xf32> into tensor<10xf32> ++ return %2 : tensor<10xf32> ++ } + } + + // ----- +@@ -208,12 +222,14 @@ module attributes {transform.with_named_sequence} { + // CHECK: memref.alloca + // CHECK: scf.for + // CHECK: memref.store +-func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { +- scf.for %iv = %lb to %ub step %step { +- %0 = memref.alloca() : memref<5xf32> +- memref.store %f, %0[%pos] : memref<5xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { ++ scf.for %iv = %lb to %ub step %step { ++ %0 = memref.alloca() : memref<5xf32> ++ memref.store %f, %0[%pos] : memref<5xf32> ++ } ++ return + } +- return + } + + // ----- +@@ -231,10 +247,12 @@ module attributes {transform.with_named_sequence} { + + // Expect `bufferization.bufferize_to_allocation` to create an alloc. + // CHECK-LABEL: func.func @empty_to_tensor_alloc() +-func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { +- // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> +- // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> +- // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> +- %0 = bufferization.alloc_tensor() : tensor<2x2xf32> +- return %0 : tensor<2x2xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { ++ // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> ++ // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> ++ // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> ++ %0 = bufferization.alloc_tensor() : tensor<2x2xf32> ++ return %0 : tensor<2x2xf32> ++ } + } +diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir +index c00b47fb936e..3e637a3ec49a 100644 +--- a/mlir/test/Dialect/LLVM/transform-e2e.mlir ++++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir +@@ -1,15 +1,17 @@ +-// RUN: mlir-opt %s --transform-interpreter -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s ++// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s + + // CHECK-LABEL: llvm.func @matmul_tensors +-func.func @matmul_tensors( +- %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) +- -> tensor<2x6xf32> { +-// CHECK-NOT: linalg +-// CHECK: llvm.intr.fmuladd{{.*}} +- %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) +- outs(%arg2: tensor<2x6xf32>) +- -> tensor<2x6xf32> +- return %0 : tensor<2x6xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @matmul_tensors( ++ %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) ++ -> tensor<2x6xf32> { ++ // CHECK-NOT: linalg ++ // CHECK: llvm.intr.fmuladd{{.*}} ++ %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) ++ outs(%arg2: tensor<2x6xf32>) ++ -> tensor<2x6xf32> ++ return %0 : tensor<2x6xf32> ++ } + } + + module attributes {transform.with_named_sequence} { +diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir +index 3f8d2ea06641..9c223737750a 100644 +--- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir ++++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir +@@ -1,4 +1,4 @@ +-// RUN: mlir-opt --split-input-file --transform-interpreter %s | FileCheck %s ++// RUN: mlir-opt --split-input-file --transform-interpreter="debug-payload-root-tag=payload" %s | FileCheck %s + + // CHECK-LABEL: func @matmul_divisible + // CHECK: scf.forall +@@ -24,19 +24,21 @@ + // CHECK: scf.forall + // CHECK: vector.transfer_read + // CHECK: vector.transfer_write +-func.func @matmul_divisible(%A: tensor<1024x1024xf32>, +- %B: tensor<1024x1024xf32>, +- %C: tensor<1024x1024xf32>) +- -> tensor<1024x1024xf32> +-{ +- %cst = arith.constant 0.000000e+00 : f32 +- %0 = linalg.fill ins(%cst : f32) +- outs(%C : tensor<1024x1024xf32>) ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @matmul_divisible(%A: tensor<1024x1024xf32>, ++ %B: tensor<1024x1024xf32>, ++ %C: tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> +- %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) +- outs(%0 : tensor<1024x1024xf32>) +- -> tensor<1024x1024xf32> +- return %1 : tensor<1024x1024xf32> ++ { ++ %cst = arith.constant 0.000000e+00 : f32 ++ %0 = linalg.fill ins(%cst : f32) ++ outs(%C : tensor<1024x1024xf32>) ++ -> tensor<1024x1024xf32> ++ %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) ++ outs(%0 : tensor<1024x1024xf32>) ++ -> tensor<1024x1024xf32> ++ return %1 : tensor<1024x1024xf32> ++ } + } + + module attributes {transform.with_named_sequence} { +@@ -143,19 +145,21 @@ module attributes {transform.with_named_sequence} { + // CHECK: linalg.matmul + // CHECK: vector.transfer_read + // CHECK: vector.transfer_write ++module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_not_divisible(%A: tensor<1023x1023xf32>, +- %B: tensor<1023x1023xf32>, +- %C: tensor<1023x1023xf32>) +- -> tensor<1023x1023xf32> +-{ +- %cst = arith.constant 0.000000e+00 : f32 +- %0 = linalg.fill ins(%cst : f32) +- outs(%C : tensor<1023x1023xf32>) ++ %B: tensor<1023x1023xf32>, ++ %C: tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> +- %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) +- outs(%0 : tensor<1023x1023xf32>) +- -> tensor<1023x1023xf32> +- return %1 : tensor<1023x1023xf32> ++ { ++ %cst = arith.constant 0.000000e+00 : f32 ++ %0 = linalg.fill ins(%cst : f32) ++ outs(%C : tensor<1023x1023xf32>) ++ -> tensor<1023x1023xf32> ++ %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) ++ outs(%0 : tensor<1023x1023xf32>) ++ -> tensor<1023x1023xf32> ++ return %1 : tensor<1023x1023xf32> ++ } + } + + module attributes {transform.with_named_sequence} { +diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +index f2e9e839b7c4..5e5657980ba1 100644 +--- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir ++++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +@@ -1,5 +1,5 @@ + +-// RUN: mlir-opt --transform-interpreter -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s ++// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s + + #map = affine_map<()[s0] -> (-s0 + 12, 7)> + +@@ -7,43 +7,45 @@ + // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, + // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, + // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, +-func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, +- %arg1: tensor<12x25xf32>, +- %arg2: tensor<24x25xf32>, +- %iv0 : index, %iv1 : index, +- %iv2 : index) -> tensor<24x25xf32> { +- %0 = affine.min #map()[%iv2] +- +- // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] +- %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> +- // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] +- %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor +- // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] +- %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> +- +- // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> +- // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] +- // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] +- // CHECK: memref.copy %[[s0]], %[[alloc0_view]] +- +- // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> +- // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] +- // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] +- // CHECK: memref.copy %[[s1]], %[[alloc1_view]] +- +- // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> +- // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] +- // No subview because there is 0 padding +- // CHECK: memref.copy %[[s2]], %[[alloc2]] +- +- // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) +- // Copy back result. +- // CHECK: memref.copy %[[alloc2]], %[[s2]] +- %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> +- +- // insert_slice bufferizes to a no-op. +- %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> +- func.return %5 : tensor<24x25xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, ++ %arg1: tensor<12x25xf32>, ++ %arg2: tensor<24x25xf32>, ++ %iv0 : index, %iv1 : index, ++ %iv2 : index) -> tensor<24x25xf32> { ++ %0 = affine.min #map()[%iv2] ++ ++ // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] ++ %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> ++ // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] ++ %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor ++ // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] ++ %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> ++ ++ // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> ++ // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] ++ // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] ++ // CHECK: memref.copy %[[s0]], %[[alloc0_view]] ++ ++ // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> ++ // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] ++ // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] ++ // CHECK: memref.copy %[[s1]], %[[alloc1_view]] ++ ++ // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> ++ // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] ++ // No subview because there is 0 padding ++ // CHECK: memref.copy %[[s2]], %[[alloc2]] ++ ++ // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) ++ // Copy back result. ++ // CHECK: memref.copy %[[alloc2]], %[[s2]] ++ %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> ++ ++ // insert_slice bufferizes to a no-op. ++ %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> ++ func.return %5 : tensor<24x25xf32> ++ } + } + + module attributes {transform.with_named_sequence} { +@@ -69,40 +71,42 @@ module attributes {transform.with_named_sequence} { + // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, + // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, + // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, +-func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, +- %arg1: tensor<12x25xf32>, +- %arg2: tensor<24x25xf32>, +- %iv0 : index, %iv1 : index, +- %iv2 : index) -> tensor<24x25xf32> { +- %0 = affine.min #map()[%iv2] +- +- // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] +- %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> +- // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] +- %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor +- // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] +- %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> +- +- // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] +- // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> +- // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] +- +- // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] +- // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> +- // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] +- +- // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] +- // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> +- // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] +- +- // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) +- // Copy back result. +- // CHECK: memref.copy %[[alloc2]], %[[s2]] +- %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> +- +- // insert_slice bufferizes to a no-op. +- %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> +- func.return %5 : tensor<24x25xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, ++ %arg1: tensor<12x25xf32>, ++ %arg2: tensor<24x25xf32>, ++ %iv0 : index, %iv1 : index, ++ %iv2 : index) -> tensor<24x25xf32> { ++ %0 = affine.min #map()[%iv2] ++ ++ // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] ++ %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> ++ // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] ++ %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor ++ // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] ++ %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> ++ ++ // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] ++ // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> ++ // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] ++ ++ // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] ++ // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> ++ // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] ++ ++ // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] ++ // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> ++ // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] ++ ++ // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) ++ // Copy back result. ++ // CHECK: memref.copy %[[alloc2]], %[[s2]] ++ %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> ++ ++ // insert_slice bufferizes to a no-op. ++ %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> ++ func.return %5 : tensor<24x25xf32> ++ } + } + + module attributes {transform.with_named_sequence} { +diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir +index 75b29e22b4d2..21e615e60438 100644 +--- a/mlir/test/Dialect/Vector/transform-vector.mlir ++++ b/mlir/test/Dialect/Vector/transform-vector.mlir +@@ -1,16 +1,18 @@ +-// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s ++// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s --split-input-file | FileCheck %s + + // CHECK-LABEL: func @matmul_tensors +-func.func @matmul_tensors( +- %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) +- -> tensor<8x32xf32> { +-// CHECK-NOT: linalg +-// CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> +-// CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> +- %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) +- outs(%arg2: tensor<8x32xf32>) +- -> tensor<8x32xf32> +- return %0 : tensor<8x32xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @matmul_tensors( ++ %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) ++ -> tensor<8x32xf32> { ++ // CHECK-NOT: linalg ++ // CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> ++ // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> ++ %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) ++ outs(%arg2: tensor<8x32xf32>) ++ -> tensor<8x32xf32> ++ return %0 : tensor<8x32xf32> ++ } + } + + module attributes {transform.with_named_sequence} { +@@ -76,11 +78,13 @@ module attributes {transform.with_named_sequence} { + // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} + // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xf16>, vector<64x64xf16> into vector<64x64xf32> + // CHECK-NEXT: return %[[R]] : vector<64x64xf32> +-func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { +- %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> +- %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> +- %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> +- return %result : vector<64x64xf32> ++module @payload attributes { transform.target_tag = "payload" } { ++ func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { ++ %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> ++ %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> ++ %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> ++ return %result : vector<64x64xf32> ++ } + } + + module attributes {transform.with_named_sequence} { From b9f0e9cd8b0aeec689ee9a0177c1dc3488513f13 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 10 Sep 2024 15:39:21 -0400 Subject: [PATCH 075/183] Make CopyGlobalMemRef create memref without memory layout and then cast it back --- mlir/lib/Quantum/Transforms/cp_global_buffers.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp index b461dc8a60..227d7977b0 100644 --- a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp +++ b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp @@ -87,7 +87,10 @@ llvm::SmallVector getReturnMemRefs(func::ReturnOp op) */ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) { - auto memrefType = cast(memref.getType()); + auto origMemrefType = cast(memref.getType()); + // Rebuild MemRefType without memory layout. + auto memrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); + llvm::SmallVector dynDims; { llvm::SmallVector dynIndices; @@ -102,8 +105,10 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) } Value newMemRef = rewriter.create(loc, memrefType, dynDims); + // Cast memrefType back to maintain memory layout. + Value castMemRef = rewriter.create(loc, origMemrefType, newMemRef); rewriter.create(loc, memref, newMemRef); - return newMemRef; + return castMemRef; } /** From c522097a1872492c7a4cbb61988f3c42b4645742 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 11 Sep 2024 15:56:23 -0400 Subject: [PATCH 076/183] Use allow-return-allocs-from-loops to avoid scf.for/while errors --- frontend/catalyst/compiler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index ed59e4646c..f0408fac55 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -195,8 +195,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt [ "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "scf-loop-bufferization-preprocessing", - "one-shot-bufferize{bufferize-function-boundaries}", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", "convert-arraylist-to-memref", "convert-bufferization-to-memref", "canonicalize", From 553ed3e5f996ddf34cbe556bc89f42d40df22251 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 11 Sep 2024 20:52:07 -0400 Subject: [PATCH 077/183] Make ForwardOp and ReverseOp compatible with ModuleOpBufferize --- .../BufferizableOpInterfaceImpl.cpp | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index e7605ce107..04e5f92003 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -3,6 +3,7 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Index/IR/IndexOps.h" @@ -234,7 +235,25 @@ struct BackpropOpInterface }; struct ForwardOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< + ForwardOpInterface, ForwardOp> { + + static bool supportsUnstructuredControlFlow() { return true; } + + bool hasTensorSemantics(Operation *op) const + { + auto isaTensor = llvm::IsaPred; + + // A function has tensor semantics if it has tensor arguments/results. + auto forwardOp = cast(op); + bool hasTensorArg = any_of(forwardOp.getArgumentTypes(), isaTensor); + bool hasTensorResult = any_of(forwardOp.getResultTypes(), isaTensor); + if (hasTensorArg || hasTensorResult) + return true; + + return false; + } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { @@ -328,7 +347,25 @@ struct ForwardOpInterface }; struct ReverseOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< + ReverseOpInterface, ReverseOp> { + + static bool supportsUnstructuredControlFlow() { return true; } + + bool hasTensorSemantics(Operation *op) const + { + auto isaTensor = llvm::IsaPred; + + // A function has tensor semantics if it has tensor arguments/results. + auto reverseOp = cast(op); + bool hasTensorArg = any_of(reverseOp.getArgumentTypes(), isaTensor); + bool hasTensorResult = any_of(reverseOp.getResultTypes(), isaTensor); + if (hasTensorArg || hasTensorResult) + return true; + + return false; + } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { @@ -352,6 +389,9 @@ struct ReverseOpInterface const bufferization::BufferizationOptions &options) const { auto reverseOp = cast(op); + llvm::outs() << "Found reverse!\n"; + llvm::outs() << "Found reverse!\n"; + llvm::outs() << "Found reverse!\n"; auto argc = reverseOp.getArgc(); auto resc = reverseOp.getResc(); From c41879216cf0b9abbd40a2827bd70c3d2d2f531e Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 13:55:05 -0400 Subject: [PATCH 078/183] Make ForwardOp and ReverseOp update signature --- .../BufferizableOpInterfaceImpl.cpp | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 04e5f92003..156db2c599 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -285,8 +285,28 @@ struct ForwardOpInterface SmallVector outputs; SmallVector cotangents; + // Update signature + auto argTys = forwardOp.getArgumentTypes(); + auto retTys = forwardOp.getResultTypes(); + SmallVector emptyRets; + SmallVector args(argTys.begin(), argTys.end()); + args.insert(args.end(), retTys.begin(), retTys.end()); + SmallVector bufferArgs; + for (Type ty : args) { + auto tensorType = dyn_cast(ty); + if (!tensorType) + bufferArgs.push_back(ty); + else + bufferArgs.push_back( + MemRefType::get(tensorType.getShape(), tensorType.getElementType())); + } + auto forwardTy = rewriter.getFunctionType(bufferArgs, emptyRets); + Block *block; - rewriter.modifyOpInPlace(op, [&] { block = forwardOp.addEntryBlock(); }); + rewriter.modifyOpInPlace(op, [&] { + forwardOp.setFunctionType(forwardTy); + block = forwardOp.addEntryBlock(); + }); PatternRewriter::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(block); @@ -316,7 +336,6 @@ struct ForwardOpInterface Value tensorIn = rewriter.create(loc, input); tensorInputs.push_back(tensorIn); } - auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); SmallVector tensorOutputs(callOp.getResults()); @@ -340,7 +359,7 @@ struct ForwardOpInterface } auto F = rewriter.getIntegerAttr(rewriter.getI1Type(), 0); - bufferization::replaceOpWithNewBufferizedOp(rewriter, op, tapeMemrefOutputs, F); + rewriter.create(loc, tapeMemrefOutputs, F); return success(); } @@ -389,9 +408,6 @@ struct ReverseOpInterface const bufferization::BufferizationOptions &options) const { auto reverseOp = cast(op); - llvm::outs() << "Found reverse!\n"; - llvm::outs() << "Found reverse!\n"; - llvm::outs() << "Found reverse!\n"; auto argc = reverseOp.getArgc(); auto resc = reverseOp.getResc(); @@ -401,8 +417,28 @@ struct ReverseOpInterface SmallVector cotangents; SmallVector tapeElements; + // Update signature + auto argTys = reverseOp.getArgumentTypes(); + auto retTys = reverseOp.getResultTypes(); + SmallVector emptyRets; + SmallVector args(argTys.begin(), argTys.end()); + args.insert(args.end(), retTys.begin(), retTys.end()); + SmallVector bufferArgs; + for (Type ty : args) { + auto tensorType = dyn_cast(ty); + if (!tensorType) + bufferArgs.push_back(ty); + else + bufferArgs.push_back( + MemRefType::get(tensorType.getShape(), tensorType.getElementType())); + } + auto reverseTy = rewriter.getFunctionType(bufferArgs, emptyRets); + Block *block; - rewriter.modifyOpInPlace(op, [&] { block = reverseOp.addEntryBlock(); }); + rewriter.modifyOpInPlace(op, [&] { + reverseOp.setFunctionType(reverseTy); + block = reverseOp.addEntryBlock(); + }); PatternRewriter::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(block); From 1e5f96c538cd2b2345b2e75693d5c7fc9f181c4f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 15:41:50 -0400 Subject: [PATCH 079/183] Do not bufferize ForwarOp if its implementation if not bufferized --- .../BufferizableOpInterfaceImpl.cpp | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 156db2c599..b945cbfa69 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -10,6 +10,7 @@ #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/Transforms/DialectConversion.h" #include "Gradient/IR/GradientOps.h" @@ -248,6 +249,21 @@ struct ForwardOpInterface auto forwardOp = cast(op); bool hasTensorArg = any_of(forwardOp.getArgumentTypes(), isaTensor); bool hasTensorResult = any_of(forwardOp.getResultTypes(), isaTensor); + + // Implementation must be bufferized. + auto implAttr = forwardOp.getImplementationAttr(); + auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); + auto implArgTy = implOp.getArgumentTypes(); + auto implResTy = implOp.getResultTypes(); + for (auto ty: implArgTy) { + if (!isa(ty)) + return false; + } + for (auto ty: implResTy) { + if (!isa(ty)) + return false; + } + if (hasTensorArg || hasTensorResult) return true; @@ -333,15 +349,16 @@ struct ForwardOpInterface SmallVector tensorInputs; for (auto input : inputs) { - Value tensorIn = rewriter.create(loc, input); + Value tensorIn = (isa(input.getType())) ? input : + rewriter.create(loc, input); tensorInputs.push_back(tensorIn); } auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); SmallVector tensorOutputs(callOp.getResults()); for (auto [memrefOutput, tensorOutput] : llvm::zip(outputs, tensorOutputs)) { - Value castVal = rewriter.create(loc, memrefOutput.getType(), - tensorOutput); + Value castVal = (isa(tensorOutput.getType())) ? tensorOutput : + rewriter.create(loc, memrefOutput.getType(), tensorOutput); rewriter.create(loc, castVal, memrefOutput); } @@ -353,7 +370,7 @@ struct ForwardOpInterface SmallVector tapeMemrefOutputs; for (auto [tapeTensorOutput, memrefTapeOutput] : llvm::zip(tapeOutputs, forwardOp.getResultTypes())) { - Value castVal = + Value castVal = (isa(tapeTensorOutput.getType())) ? tapeTensorOutput : rewriter.create(loc, memrefTapeOutput, tapeTensorOutput); tapeMemrefOutputs.push_back(castVal); } @@ -504,6 +521,6 @@ void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRe AdjointOp::attachInterface(*ctx); BackpropOp::attachInterface(*ctx); ForwardOp::attachInterface(*ctx); - ReverseOp::attachInterface(*ctx); + //ReverseOp::attachInterface(*ctx); }); } \ No newline at end of file From a2af4576e0ce453ef6786ff0d83764603074717c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 15:46:56 -0400 Subject: [PATCH 080/183] Update ReverseOp --- .../BufferizableOpInterfaceImpl.cpp | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index b945cbfa69..b74f22509b 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -396,6 +396,21 @@ struct ReverseOpInterface auto reverseOp = cast(op); bool hasTensorArg = any_of(reverseOp.getArgumentTypes(), isaTensor); bool hasTensorResult = any_of(reverseOp.getResultTypes(), isaTensor); + + // Implementation must be bufferized. + auto implAttr = reverseOp.getImplementationAttr(); + auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); + auto implArgTy = implOp.getArgumentTypes(); + auto implResTy = implOp.getResultTypes(); + for (auto ty: implArgTy) { + if (!isa(ty)) + return false; + } + for (auto ty: implResTy) { + if (!isa(ty)) + return false; + } + if (hasTensorArg || hasTensorResult) return true; @@ -488,12 +503,14 @@ struct ReverseOpInterface SmallVector tensorInputs; for (auto tapeElement : tapeElements) { - Value tensorIn = rewriter.create(loc, tapeElement); + Value tensorIn = (isa(tapeElement.getType())) ? tapeElement : + rewriter.create(loc, tapeElement); tensorInputs.push_back(tensorIn); } for (auto cotangent : cotangents) { - Value tensorIn = rewriter.create(loc, cotangent); + Value tensorIn = (isa(cotangent.getType())) ? cotangent : + rewriter.create(loc, cotangent); tensorInputs.push_back(tensorIn); } @@ -501,8 +518,8 @@ struct ReverseOpInterface SmallVector tensorOutputs(callOp.getResults()); for (auto [differential, tensorOutput] : llvm::zip(differentials, tensorOutputs)) { - Value castVal = rewriter.create(loc, differential.getType(), - tensorOutput); + Value castVal = (isa(tensorOutput.getType())) ? tensorOutput : + rewriter.create(loc, differential.getType(), tensorOutput); rewriter.create(loc, castVal, differential); } @@ -521,6 +538,6 @@ void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRe AdjointOp::attachInterface(*ctx); BackpropOp::attachInterface(*ctx); ForwardOp::attachInterface(*ctx); - //ReverseOp::attachInterface(*ctx); + ReverseOp::attachInterface(*ctx); }); } \ No newline at end of file From 723fa1139df2453a4226cefe321955dd4559fd30 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 15:58:14 -0400 Subject: [PATCH 081/183] Update conditions for ForwardOp and ReverseOp --- .../Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index b74f22509b..f5bfe7d91d 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -17,6 +17,7 @@ #include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" #include "Gradient/Utils/GradientShape.h" #include "Quantum/IR/QuantumOps.h" +#include "llvm/ADT/STLExtras.h" using namespace mlir; using namespace catalyst::gradient; @@ -249,6 +250,8 @@ struct ForwardOpInterface auto forwardOp = cast(op); bool hasTensorArg = any_of(forwardOp.getArgumentTypes(), isaTensor); bool hasTensorResult = any_of(forwardOp.getResultTypes(), isaTensor); + bool hasTensorFuncInType = any_of(forwardOp.getFunctionType().getInputs(), isaTensor); + bool hasTensorFuncOutType = any_of(forwardOp.getFunctionType().getResults(), isaTensor); // Implementation must be bufferized. auto implAttr = forwardOp.getImplementationAttr(); @@ -264,7 +267,7 @@ struct ForwardOpInterface return false; } - if (hasTensorArg || hasTensorResult) + if (hasTensorArg || hasTensorResult || hasTensorFuncInType || hasTensorFuncOutType) return true; return false; @@ -396,6 +399,8 @@ struct ReverseOpInterface auto reverseOp = cast(op); bool hasTensorArg = any_of(reverseOp.getArgumentTypes(), isaTensor); bool hasTensorResult = any_of(reverseOp.getResultTypes(), isaTensor); + bool hasTensorFuncInType = any_of(reverseOp.getFunctionType().getInputs(), isaTensor); + bool hasTensorFuncOutType = any_of(reverseOp.getFunctionType().getResults(), isaTensor); // Implementation must be bufferized. auto implAttr = reverseOp.getImplementationAttr(); @@ -411,7 +416,7 @@ struct ReverseOpInterface return false; } - if (hasTensorArg || hasTensorResult) + if (hasTensorArg || hasTensorResult || hasTensorFuncInType || hasTensorFuncOutType) return true; return false; From 49b197b84d554e34621fb689f9b0ea6c7d17b228 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 16:55:29 -0400 Subject: [PATCH 082/183] Remove ForwardOp and ReverseOp workarounds --- .../BufferizableOpInterfaceImpl.cpp | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index f5bfe7d91d..ea716be399 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -252,21 +252,6 @@ struct ForwardOpInterface bool hasTensorResult = any_of(forwardOp.getResultTypes(), isaTensor); bool hasTensorFuncInType = any_of(forwardOp.getFunctionType().getInputs(), isaTensor); bool hasTensorFuncOutType = any_of(forwardOp.getFunctionType().getResults(), isaTensor); - - // Implementation must be bufferized. - auto implAttr = forwardOp.getImplementationAttr(); - auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); - auto implArgTy = implOp.getArgumentTypes(); - auto implResTy = implOp.getResultTypes(); - for (auto ty: implArgTy) { - if (!isa(ty)) - return false; - } - for (auto ty: implResTy) { - if (!isa(ty)) - return false; - } - if (hasTensorArg || hasTensorResult || hasTensorFuncInType || hasTensorFuncOutType) return true; @@ -401,21 +386,6 @@ struct ReverseOpInterface bool hasTensorResult = any_of(reverseOp.getResultTypes(), isaTensor); bool hasTensorFuncInType = any_of(reverseOp.getFunctionType().getInputs(), isaTensor); bool hasTensorFuncOutType = any_of(reverseOp.getFunctionType().getResults(), isaTensor); - - // Implementation must be bufferized. - auto implAttr = reverseOp.getImplementationAttr(); - auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); - auto implArgTy = implOp.getArgumentTypes(); - auto implResTy = implOp.getResultTypes(); - for (auto ty: implArgTy) { - if (!isa(ty)) - return false; - } - for (auto ty: implResTy) { - if (!isa(ty)) - return false; - } - if (hasTensorArg || hasTensorResult || hasTensorFuncInType || hasTensorFuncOutType) return true; From 0d4bfbd29244626ca39ee16a83738a5ffe794e53 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 17:42:10 -0400 Subject: [PATCH 083/183] Create templates for ForwardOp and ReverseOp preprocessing --- mlir/include/Gradient/Transforms/Passes.h | 1 + mlir/include/Gradient/Transforms/Passes.td | 12 ++++ mlir/include/Gradient/Transforms/Patterns.h | 1 + .../Catalyst/Transforms/RegisterAllPasses.cpp | 1 + mlir/lib/Gradient/Transforms/CMakeLists.txt | 2 + .../Transforms/PreprocessingPatterns.cpp | 69 +++++++++++++++++++ .../Transforms/gradient_preprocess.cpp | 58 ++++++++++++++++ 7 files changed, 144 insertions(+) create mode 100644 mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp create mode 100644 mlir/lib/Gradient/Transforms/gradient_preprocess.cpp diff --git a/mlir/include/Gradient/Transforms/Passes.h b/mlir/include/Gradient/Transforms/Passes.h index 40881b28fb..539e562905 100644 --- a/mlir/include/Gradient/Transforms/Passes.h +++ b/mlir/include/Gradient/Transforms/Passes.h @@ -22,6 +22,7 @@ namespace catalyst { // Pass creation for use in mlir opt tools std::unique_ptr createGradientBufferizationPass(); +std::unique_ptr createGradientPreprocessingPass(); std::unique_ptr createGradientLoweringPass(); std::unique_ptr createGradientConversionPass(); diff --git a/mlir/include/Gradient/Transforms/Passes.td b/mlir/include/Gradient/Transforms/Passes.td index 0b1fbfb76f..3c75e61ab2 100644 --- a/mlir/include/Gradient/Transforms/Passes.td +++ b/mlir/include/Gradient/Transforms/Passes.td @@ -71,4 +71,16 @@ def GradientConversionPass : Pass<"convert-gradient-to-llvm"> { ]; } +def GradientPreprocessingPass : Pass<"gradient-preprocess"> { + let summary = "Bufferize tensors in quantum operations."; + + let dependentDialects = [ + "bufferization::BufferizationDialect", + "memref::MemRefDialect", + "index::IndexDialect" + ]; + + let constructor = "catalyst::createGradientPreprocessingPass()"; +} + #endif // GRADIENT_PASSES diff --git a/mlir/include/Gradient/Transforms/Patterns.h b/mlir/include/Gradient/Transforms/Patterns.h index c21dac8f6c..bfa46f1130 100644 --- a/mlir/include/Gradient/Transforms/Patterns.h +++ b/mlir/include/Gradient/Transforms/Patterns.h @@ -22,6 +22,7 @@ namespace catalyst { namespace gradient { void populateBufferizationPatterns(mlir::TypeConverter &, mlir::RewritePatternSet &); +void populatePreprocessingPatterns(mlir::RewritePatternSet &); void populateLoweringPatterns(mlir::RewritePatternSet &); void populateConversionPatterns(mlir::LLVMTypeConverter &, mlir::RewritePatternSet &); diff --git a/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp b/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp index ea0220c582..48d0110b77 100644 --- a/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp +++ b/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp @@ -25,6 +25,7 @@ void catalyst::registerAllCatalystPasses() mlir::registerPass(catalyst::createGradientBufferizationPass); mlir::registerPass(catalyst::createGradientLoweringPass); mlir::registerPass(catalyst::createGradientConversionPass); + mlir::registerPass(catalyst::createGradientPreprocessingPass); mlir::registerPass(catalyst::createScatterLoweringPass); mlir::registerPass(catalyst::createAdjointLoweringPass); mlir::registerPass(catalyst::createQuantumBufferizationPass); diff --git a/mlir/lib/Gradient/Transforms/CMakeLists.txt b/mlir/lib/Gradient/Transforms/CMakeLists.txt index 82c00aa58a..ab26e832bc 100644 --- a/mlir/lib/Gradient/Transforms/CMakeLists.txt +++ b/mlir/lib/Gradient/Transforms/CMakeLists.txt @@ -5,6 +5,8 @@ file(GLOB SRC BufferizableOpInterfaceImpl.cpp BufferizationPatterns.cpp gradient_bufferize.cpp + PreprocessingPatterns.cpp + gradient_preprocess.cpp LoweringPatterns.cpp gradient_lowering.cpp ConversionPatterns.cpp diff --git a/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp new file mode 100644 index 0000000000..3330c64919 --- /dev/null +++ b/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp @@ -0,0 +1,69 @@ +// Copyright 2022-2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "iostream" +#include "llvm/Support/raw_ostream.h" + +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Index/IR/IndexOps.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "Gradient/IR/GradientOps.h" +#include "Gradient/Transforms/Passes.h" +#include "Gradient/Utils/GradientShape.h" + +using namespace mlir; +using namespace catalyst::gradient; + +namespace { + +struct PreprocessForwardOp : public OpRewritePattern { + using mlir::OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult matchAndRewrite(ForwardOp op, + mlir::PatternRewriter &rewriter) const override + { + llvm::outs() << "forward\n"; + return success(); + } +}; + +struct PreprocessReverseOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult matchAndRewrite(ReverseOp op, + mlir::PatternRewriter &rewriter) const override + { + llvm::outs() << "reverse\n"; + return success(); + } +}; + +} // namespace + +namespace catalyst { +namespace gradient { + +void populatePreprocessingPatterns(RewritePatternSet &patterns) +{ + patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); +} + +} // namespace gradient +} // namespace catalyst diff --git a/mlir/lib/Gradient/Transforms/gradient_preprocess.cpp b/mlir/lib/Gradient/Transforms/gradient_preprocess.cpp new file mode 100644 index 0000000000..f470d5f8d9 --- /dev/null +++ b/mlir/lib/Gradient/Transforms/gradient_preprocess.cpp @@ -0,0 +1,58 @@ +// Copyright 2022-2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Index/IR/IndexDialect.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +#include "Gradient/IR/GradientOps.h" +#include "Gradient/Transforms/Passes.h" +#include "Gradient/Transforms/Patterns.h" + +using namespace mlir; +using namespace catalyst::gradient; + +namespace catalyst { +namespace gradient { + +#define GEN_PASS_DEF_GRADIENTPREPROCESSINGPASS +#include "Gradient/Transforms/Passes.h.inc" + +struct GradientPreprocessingPass : impl::GradientPreprocessingPassBase { + using GradientPreprocessingPassBase::GradientPreprocessingPassBase; + + void runOnOperation() final + { + RewritePatternSet patterns(&getContext()); + populatePreprocessingPatterns(patterns); + + if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) { + return signalPassFailure(); + } + } +}; + +} // namespace gradient + +std::unique_ptr createGradientPreprocessingPass() +{ + return std::make_unique(); +} + +} // namespace catalyst From 79ce24e8aca26dcbf24fe81dfb18886f8eb23892 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 12 Sep 2024 18:23:48 -0400 Subject: [PATCH 084/183] Use preprocessing steps to add Func.call in ForwardOp and ReverseOp --- .../Transforms/PreprocessingPatterns.cpp | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp index 3330c64919..51b0bcd73d 100644 --- a/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PreprocessingPatterns.cpp @@ -21,6 +21,7 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" #include "Gradient/IR/GradientOps.h" @@ -38,7 +39,55 @@ struct PreprocessForwardOp : public OpRewritePattern { mlir::LogicalResult matchAndRewrite(ForwardOp op, mlir::PatternRewriter &rewriter) const override { + if(!op.getFunctionBody().empty()) + return failure(); + llvm::outs() << "forward\n"; + + auto argc = op.getArgc(); + auto resc = op.getResc(); + SmallVector inputs; + SmallVector differentials; + SmallVector outputs; + SmallVector cotangents; + + Block *block; + rewriter.modifyOpInPlace(op, [&] { block = op.addEntryBlock(); }); + + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(block); + auto params = op.getArguments(); + + for (size_t i = 0; i < argc * 2; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? differentials.push_back(val) : inputs.push_back(val); + } + + auto upperLimit = (argc * 2) + (resc * 2); + for (size_t i = argc * 2; i < upperLimit; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? cotangents.push_back(val) : outputs.push_back(val); + } + + auto implAttr = op.getImplementationAttr(); + auto impl = op.getImplementation(); + auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); + auto implResTy = implOp.getResultTypes(); + Location loc = op.getLoc(); + + auto callOp = rewriter.create(loc, impl, implResTy, inputs); + SmallVector tensorOutputs(callOp.getResults()); + + auto tapeCount = op.getTape(); + SmallVector tapeOutputs; + tapeOutputs.insert(tapeOutputs.begin(), tensorOutputs.end() - tapeCount, + tensorOutputs.end()); + + auto F = rewriter.getIntegerAttr(rewriter.getI1Type(), 0); + rewriter.create(loc, tapeOutputs, F); + return success(); } }; @@ -49,7 +98,65 @@ struct PreprocessReverseOp : public OpRewritePattern { mlir::LogicalResult matchAndRewrite(ReverseOp op, mlir::PatternRewriter &rewriter) const override { + if(!op.getFunctionBody().empty()) + return failure(); + llvm::outs() << "reverse\n"; + + auto argc = op.getArgc(); + auto resc = op.getResc(); + SmallVector inputs; + SmallVector differentials; + SmallVector outputs; + SmallVector cotangents; + SmallVector tapeElements; + + Block *block; + rewriter.modifyOpInPlace(op, [&] { block = op.addEntryBlock(); }); + + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(block); + auto params = op.getArguments(); + + for (size_t i = 0; i < argc * 2; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? differentials.push_back(val) : inputs.push_back(val); + } + + auto upperLimit = (argc * 2) + (resc * 2); + for (size_t i = argc * 2; i < upperLimit; i++) { + bool isDup = (i % 2) != 0; + Value val = params[i]; + isDup ? cotangents.push_back(val) : outputs.push_back(val); + } + + auto tapeCount = op.getTape(); + auto uppestLimit = upperLimit + tapeCount; + for (size_t i = upperLimit; i < uppestLimit; i++) { + tapeElements.push_back(params[i]); + } + + auto implAttr = op.getImplementationAttr(); + auto impl = op.getImplementation(); + auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); + auto implResTy = implOp.getResultTypes(); + Location loc = op.getLoc(); + + SmallVector tensorInputs; + for (auto tapeElement : tapeElements) { + tensorInputs.push_back(tapeElement); + } + + for (auto cotangent : cotangents) { + tensorInputs.push_back(cotangent); + } + + rewriter.create(loc, impl, implResTy, tensorInputs); + + auto T = rewriter.getIntegerAttr(rewriter.getI1Type(), 1); + rewriter.create(loc, ValueRange{}, T); + return success(); } }; From 7d5fc1f35fb1ecbc07287a7943749f17383e6ca7 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 16 Sep 2024 17:07:28 -0400 Subject: [PATCH 085/183] Remove llvm scf patch --- mlir/Makefile | 4 - mlir/patches/scf-for-preprocessing.patch | 128 ----------------------- 2 files changed, 132 deletions(-) delete mode 100644 mlir/patches/scf-for-preprocessing.patch diff --git a/mlir/Makefile b/mlir/Makefile index 4c8b9ef4f3..debedd0c3a 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -14,7 +14,6 @@ ENABLE_ASAN?=OFF BUILD_TYPE?=Release LLVM_ROOT=$(MK_DIR)/llvm-project LLVM_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch -LLVM_SCF_PATCH_FILE=$(MK_DIR)/patches/scf-for-preprocessing.patch TARGET_FILE=$(MK_DIR)/mlir-hlo/mhlo/transforms/CMakeLists.txt PATCH_FILE=$(MK_DIR)/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -60,9 +59,6 @@ llvm: @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); \ fi - @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_SCF_PATCH_FILE) > /dev/null 2>&1; then \ - patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_SCF_PATCH_FILE); \ - fi cmake -G Ninja -S llvm-project/llvm -B $(LLVM_BUILD_DIR) \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DLLVM_BUILD_EXAMPLES=OFF \ diff --git a/mlir/patches/scf-for-preprocessing.patch b/mlir/patches/scf-for-preprocessing.patch deleted file mode 100644 index 769556de62..0000000000 --- a/mlir/patches/scf-for-preprocessing.patch +++ /dev/null @@ -1,128 +0,0 @@ -diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h -index 90b315e83a8cfd..6107219ea94ae1 100644 ---- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h -+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h -@@ -23,6 +23,9 @@ namespace mlir { - /// Creates a pass that bufferizes the SCF dialect. - std::unique_ptr createSCFBufferizePass(); - -+/// Creates a pass that preprocesses SCF loop before One-Shot Bufferize. -+std::unique_ptr createSCFLoopBufferizationPreprocessingPass(); -+ - /// Creates a pass that specializes for loop for unrolling and - /// vectorization. - std::unique_ptr createForLoopSpecializationPass(); -diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td -index 350611ad86873d..94d3e51a1c9044 100644 ---- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td -+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td -@@ -18,6 +18,27 @@ def SCFBufferize : Pass<"scf-bufferize"> { - "memref::MemRefDialect"]; - } - -+def SCFLoopBufferizationPreprocessing -+ : Pass<"scf-loop-bufferization-preprocessing"> { -+ let summary = "Preprocess loops before One-Shot Bufferize"; -+ -+ let description = [{ -+ Preprocess `scf.for` loops before running One-Shot Bufferize to support -+ loops where a yielded tensor is not equivalent to the respective iter_arg. -+ Such IR is currently not supported by One-Shot Bufferize. -+ -+ This pass inserts a `bufferization.materialize_in_destination` op for every -+ yielded tensor, such that the yielded value is guaranteed to materialize in -+ the future buffer of the iter_arg; this is done by copying the tensor -+ contents into the iter_arg buffer. Such memcpys are a no-op in case the -+ tensor contents already materialize in the iter_arg buffer. -+ }]; -+ -+ let constructor = "mlir::createSCFLoopBufferizationPreprocessingPass()"; -+ let dependentDialects = ["bufferization::BufferizationDialect", -+ "scf::SCFDialect"]; -+} -+ - // Note: Making these canonicalization patterns would require a dependency - // of the SCF dialect on the Affine/Tensor/MemRef dialects or vice versa. - def SCFForLoopCanonicalization -diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp -index 21c618ab633f60..727c4fc7c6396e 100644 ---- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp -+++ b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp -@@ -17,6 +17,7 @@ - - namespace mlir { - #define GEN_PASS_DEF_SCFBUFFERIZE -+#define GEN_PASS_DEF_SCFLOOPBUFFERIZATIONPREPROCESSING - #include "mlir/Dialect/SCF/Transforms/Passes.h.inc" - } // namespace mlir - -@@ -40,8 +41,40 @@ struct SCFBufferizePass : public impl::SCFBufferizeBase { - return signalPassFailure(); - }; - }; -+ -+struct SCFLoopBufferizationPreprocessingPass -+ : public impl::SCFLoopBufferizationPreprocessingBase< -+ SCFLoopBufferizationPreprocessingPass> { -+ void runOnOperation() override { -+ OpBuilder builder(getOperation()->getContext()); -+ getOperation()->walk([&](scf::YieldOp yieldOp) { -+ builder.setInsertionPoint(yieldOp); -+ // TODO: Support scf.while. -+ auto forOp = dyn_cast(yieldOp->getParentOp()); -+ if (!forOp) -+ return WalkResult::skip(); -+ for (OpOperand &operand : yieldOp->getOpOperands()) { -+ auto tensorType = dyn_cast(operand.get().getType()); -+ if (!tensorType) -+ continue; -+ auto bbArg = forOp.getRegionIterArgs()[operand.getOperandNumber()]; -+ Value materialized = -+ builder -+ .create( -+ yieldOp.getLoc(), tensorType, operand.get(), bbArg) -+ .getResult(); -+ operand.set(materialized); -+ } -+ return WalkResult::advance(); -+ }); -+ } -+}; - } // namespace - - std::unique_ptr mlir::createSCFBufferizePass() { - return std::make_unique(); - } -+ -+std::unique_ptr mlir::createSCFLoopBufferizationPreprocessingPass() { -+ return std::make_unique(); -+} -diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-preprocessing.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-preprocessing.mlir -new file mode 100644 -index 00000000000000..17661178245088 ---- /dev/null -+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-preprocessing.mlir -@@ -0,0 +1,23 @@ -+// RUN: mlir-opt %s -scf-loop-bufferization-preprocessing -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" -canonicalize | FileCheck %s -+ -+// CHECK-LABEL: func @conflict_in_loop( -+// CHECK-SAME: %[[A:.*]]: memref<10xf32> -+func.func @conflict_in_loop(%A: tensor<10xf32>, %f: f32, %idx: index, %lb: index, %ub: index, %step: index) -> f32 { -+ // CHECK: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -+ %r = scf.for %i = %lb to %ub step %step iter_args(%tA = %A) -> (tensor<10xf32>) { -+ // CHECK: %[[alloc:.*]] = memref.alloc() -+ // CHECK: memref.copy %[[A]], %[[alloc]] -+ // CHECK: memref.store %{{.*}}, %[[alloc]] -+ %0 = tensor.insert %f into %tA[%i] : tensor<10xf32> -+ // CHECK: %[[read:.*]] = memref.load %[[A]] -+ %read = tensor.extract %tA[%idx] : tensor<10xf32> -+ // CHECK: vector.print %[[read]] -+ vector.print %read : f32 -+ // CHECK: memref.copy %[[alloc]], %[[A]] -+ scf.yield %0 : tensor<10xf32> -+ } -+ -+ // CHECK: memref.load %[[A]] -+ %f0 = tensor.extract %r[%step] : tensor<10xf32> -+ return %f0 : f32 -+} From 6a24347b880efc28b5172db5247960f4b693ce03 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 10:50:59 -0400 Subject: [PATCH 086/183] CI warning cleanup --- mlir/include/Gradient/Transforms/Passes.td | 12 ---- .../BufferizableOpInterfaceImpl.cpp | 1 - .../Catalyst/Transforms/RegisterAllPasses.cpp | 1 - mlir/lib/Gradient/IR/GradientDialect.cpp | 4 +- .../BufferizableOpInterfaceImpl.cpp | 72 ++++++++++--------- .../GradMethods/ClassicalJacobian.cpp | 7 +- .../GradMethods/PS_QuantumGradient.cpp | 7 +- mlir/lib/Quantum/IR/QuantumDialect.cpp | 6 +- 8 files changed, 53 insertions(+), 57 deletions(-) diff --git a/mlir/include/Gradient/Transforms/Passes.td b/mlir/include/Gradient/Transforms/Passes.td index 8173167d49..72a7dcf7c0 100644 --- a/mlir/include/Gradient/Transforms/Passes.td +++ b/mlir/include/Gradient/Transforms/Passes.td @@ -93,16 +93,4 @@ def GradientConversionPass : Pass<"convert-gradient-to-llvm"> { ]; } -def GradientPreprocessingPass : Pass<"gradient-preprocess"> { - let summary = "Bufferize tensors in quantum operations."; - - let dependentDialects = [ - "bufferization::BufferizationDialect", - "memref::MemRefDialect", - "index::IndexDialect" - ]; - - let constructor = "catalyst::createGradientPreprocessingPass()"; -} - #endif // GRADIENT_PASSES diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index fff38f63c2..15f287f1d5 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -124,7 +124,6 @@ struct CustomCallOpInterface struct CallbackOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< CallbackOpInterface, CallbackOp> { - static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const diff --git a/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp b/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp index 13a2dee3e2..e0eb41f7f7 100644 --- a/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp +++ b/mlir/lib/Catalyst/Transforms/RegisterAllPasses.cpp @@ -27,7 +27,6 @@ void catalyst::registerAllCatalystPasses() mlir::registerPass(catalyst::createGradientPostprocessingPass); mlir::registerPass(catalyst::createGradientLoweringPass); mlir::registerPass(catalyst::createGradientConversionPass); - mlir::registerPass(catalyst::createGradientPreprocessingPass); mlir::registerPass(catalyst::createScatterLoweringPass); mlir::registerPass(catalyst::createAdjointLoweringPass); mlir::registerPass(catalyst::createQuantumBufferizationPass); diff --git a/mlir/lib/Gradient/IR/GradientDialect.cpp b/mlir/lib/Gradient/IR/GradientDialect.cpp index c80b1b5eb6..068079b99f 100644 --- a/mlir/lib/Gradient/IR/GradientDialect.cpp +++ b/mlir/lib/Gradient/IR/GradientDialect.cpp @@ -51,8 +51,8 @@ void GradientDialect::initialize() #include "Gradient/IR/GradientOps.cpp.inc" >(); addInterface(); - declarePromisedInterfaces(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index ea716be399..a5053f2598 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -47,8 +47,8 @@ Value generateAllocation(OpBuilder &builder, Location loc, Value reference) /// /// The allocation size and shape is deduced from a list of existing memref values. /// -void generateAllocations(RewriterBase &rewriter, Location loc, - SmallVectorImpl &allocations, ValueRange referenceValues) +void generateAllocations(RewriterBase &rewriter, Location loc, SmallVectorImpl &allocations, + ValueRange referenceValues) { for (Value memref : referenceValues) { allocations.push_back( @@ -106,16 +106,16 @@ struct AdjointOpInterface bufferArgs.push_back(*opBuffer); } - - rewriter.create(loc, TypeRange{}, adjointOp.getCalleeAttr(), adjointOp.getGradSize(), - bufferArgs, memrefValues); + rewriter.create(loc, TypeRange{}, adjointOp.getCalleeAttr(), + adjointOp.getGradSize(), bufferArgs, memrefValues); bufferization::replaceOpWithBufferizedValues(rewriter, op, memrefValues); return success(); } }; struct BackpropOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { @@ -153,15 +153,15 @@ struct BackpropOpInterface SmallVector bufferArgs; ValueRange operands = backpropOp.getArgs(); for (Value operand : operands) { - if(isa(operand.getType())) { + if (isa(operand.getType())) { FailureOr opBuffer = getBuffer(rewriter, operand, options); if (failed(opBuffer)) return failure(); bufferArgs.push_back(*opBuffer); - } else { + } + else { bufferArgs.push_back(operand); } - } std::vector diffArgs = @@ -238,10 +238,8 @@ struct BackpropOpInterface struct ForwardOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< - ForwardOpInterface, ForwardOp> { - + ForwardOpInterface, ForwardOp> { static bool supportsUnstructuredControlFlow() { return true; } - bool hasTensorSemantics(Operation *op) const { auto isaTensor = llvm::IsaPred; @@ -305,10 +303,10 @@ struct ForwardOpInterface MemRefType::get(tensorType.getShape(), tensorType.getElementType())); } auto forwardTy = rewriter.getFunctionType(bufferArgs, emptyRets); - + Block *block; - rewriter.modifyOpInPlace(op, [&] { - forwardOp.setFunctionType(forwardTy); + rewriter.modifyOpInPlace(op, [&] { + forwardOp.setFunctionType(forwardTy); block = forwardOp.addEntryBlock(); }); @@ -337,16 +335,19 @@ struct ForwardOpInterface SmallVector tensorInputs; for (auto input : inputs) { - Value tensorIn = (isa(input.getType())) ? input : - rewriter.create(loc, input); + Value tensorIn = (isa(input.getType())) + ? input + : rewriter.create(loc, input); tensorInputs.push_back(tensorIn); } auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); SmallVector tensorOutputs(callOp.getResults()); for (auto [memrefOutput, tensorOutput] : llvm::zip(outputs, tensorOutputs)) { - Value castVal = (isa(tensorOutput.getType())) ? tensorOutput : - rewriter.create(loc, memrefOutput.getType(), tensorOutput); + Value castVal = (isa(tensorOutput.getType())) + ? tensorOutput + : rewriter.create( + loc, memrefOutput.getType(), tensorOutput); rewriter.create(loc, castVal, memrefOutput); } @@ -358,8 +359,10 @@ struct ForwardOpInterface SmallVector tapeMemrefOutputs; for (auto [tapeTensorOutput, memrefTapeOutput] : llvm::zip(tapeOutputs, forwardOp.getResultTypes())) { - Value castVal = (isa(tapeTensorOutput.getType())) ? tapeTensorOutput : - rewriter.create(loc, memrefTapeOutput, tapeTensorOutput); + Value castVal = (isa(tapeTensorOutput.getType())) + ? tapeTensorOutput + : rewriter.create(loc, memrefTapeOutput, + tapeTensorOutput); tapeMemrefOutputs.push_back(castVal); } @@ -372,7 +375,7 @@ struct ForwardOpInterface struct ReverseOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< - ReverseOpInterface, ReverseOp> { + ReverseOpInterface, ReverseOp> { static bool supportsUnstructuredControlFlow() { return true; } @@ -440,10 +443,10 @@ struct ReverseOpInterface MemRefType::get(tensorType.getShape(), tensorType.getElementType())); } auto reverseTy = rewriter.getFunctionType(bufferArgs, emptyRets); - + Block *block; - rewriter.modifyOpInPlace(op, [&] { - reverseOp.setFunctionType(reverseTy); + rewriter.modifyOpInPlace(op, [&] { + reverseOp.setFunctionType(reverseTy); block = reverseOp.addEntryBlock(); }); @@ -472,20 +475,23 @@ struct ReverseOpInterface auto implAttr = reverseOp.getImplementationAttr(); auto impl = reverseOp.getImplementation(); - auto implOp = SymbolTable::lookupNearestSymbolFrom(reverseOp, implAttr); + auto implOp = + SymbolTable::lookupNearestSymbolFrom(reverseOp, implAttr); auto implResTy = implOp.getResultTypes(); Location loc = reverseOp.getLoc(); SmallVector tensorInputs; for (auto tapeElement : tapeElements) { - Value tensorIn = (isa(tapeElement.getType())) ? tapeElement : - rewriter.create(loc, tapeElement); + Value tensorIn = (isa(tapeElement.getType())) + ? tapeElement + : rewriter.create(loc, tapeElement); tensorInputs.push_back(tensorIn); } for (auto cotangent : cotangents) { - Value tensorIn = (isa(cotangent.getType())) ? cotangent : - rewriter.create(loc, cotangent); + Value tensorIn = (isa(cotangent.getType())) + ? cotangent + : rewriter.create(loc, cotangent); tensorInputs.push_back(tensorIn); } @@ -493,8 +499,10 @@ struct ReverseOpInterface SmallVector tensorOutputs(callOp.getResults()); for (auto [differential, tensorOutput] : llvm::zip(differentials, tensorOutputs)) { - Value castVal = (isa(tensorOutput.getType())) ? tensorOutput : - rewriter.create(loc, differential.getType(), tensorOutput); + Value castVal = (isa(tensorOutput.getType())) + ? tensorOutput + : rewriter.create( + loc, differential.getType(), tensorOutput); rewriter.create(loc, castVal, differential); } diff --git a/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp b/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp index fe0e4fb3e6..30baf98952 100644 --- a/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp +++ b/mlir/lib/Gradient/Transforms/GradMethods/ClassicalJacobian.cpp @@ -147,7 +147,8 @@ func::FuncOp genSplitPreprocessed(PatternRewriter &rewriter, Location loc, func: PatternRewriter::InsertionGuard insertGuard(rewriter); rewriter.setInsertionPointToStart(&splitFn.getBody().front()); Value paramsBuffer = rewriter.create(loc, paramsBufferType, paramCount); - Value paramsTensor = rewriter.create(loc, paramsBuffer, /*restrict=*/true); + Value paramsTensor = + rewriter.create(loc, paramsBuffer, /*restrict=*/true); qnodeQuantumArgs.push_back(paramsTensor); MemRefType paramsProcessedType = MemRefType::get({}, rewriter.getIndexType()); @@ -289,8 +290,8 @@ func::FuncOp genArgMapFunction(PatternRewriter &rewriter, Location loc, func::Fu else if (auto returnOp = dyn_cast(op)) { PatternRewriter::InsertionGuard insertionGuard(rewriter); rewriter.setInsertionPoint(returnOp); - Value paramsVector = - rewriter.create(loc, paramsVectorType, paramsBuffer, /*restrict=*/true) ; + Value paramsVector = rewriter.create( + loc, paramsVectorType, paramsBuffer, /*restrict=*/true); returnOp.getOperandsMutable().assign(paramsVector); } }); diff --git a/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp b/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp index 84c6c19ba4..c51277ed68 100644 --- a/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp +++ b/mlir/lib/Gradient/Transforms/GradMethods/PS_QuantumGradient.cpp @@ -58,7 +58,8 @@ static std::vector computePartialDerivative(PatternRewriter &rewriter, Lo { constexpr double shift = PI / 2; ShapedType shiftVectorType = RankedTensorType::get({numShifts}, rewriter.getF64Type()); - Value selectorVector = rewriter.create(loc, selectorBuffer, /*restrict=*/true); + Value selectorVector = + rewriter.create(loc, selectorBuffer, /*restrict=*/true); // Define the shift vectors (pos/neg) as sparse tensor constants. DenseElementsAttr nonZeroIndices = rewriter.getI64TensorAttr(currentShift); @@ -284,8 +285,8 @@ func::FuncOp ParameterShiftLowering::genQGradFunction(PatternRewriter &rewriter, std::vector gradientTensors; gradientTensors.reserve(gradResTypes.size()); for (Value gradientBuffer : gradientBuffers) { - gradientTensors.push_back( - rewriter.create(loc, gradientBuffer, /*restrict=*/true)); + gradientTensors.push_back(rewriter.create( + loc, gradientBuffer, /*restrict=*/true)); } op->setOperands(gradientTensors); } diff --git a/mlir/lib/Quantum/IR/QuantumDialect.cpp b/mlir/lib/Quantum/IR/QuantumDialect.cpp index c8c3788efd..d4d820326f 100644 --- a/mlir/lib/Quantum/IR/QuantumDialect.cpp +++ b/mlir/lib/Quantum/IR/QuantumDialect.cpp @@ -44,9 +44,9 @@ void QuantumDialect::initialize() #define GET_OP_LIST #include "Quantum/IR/QuantumOps.cpp.inc" >(); - declarePromisedInterfaces(); + declarePromisedInterfaces(); } //===----------------------------------------------------------------------===// From 73bef19a8f53f82c161cef13d5909e4fab11c804 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 13:56:39 -0400 Subject: [PATCH 087/183] Update llvm patch --- mlir/patches/moduleOp-bufferization.patch | 37 +++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch index 25384f99fb..683f1d51a3 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/moduleOp-bufferization.patch @@ -34,7 +34,7 @@ index 0b91d3d675b7..8bed0dfc5814 100644 void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp -index 053ea7935260..fed1c49d22be 100644 +index 9fbe574ec392..9749a71f3514 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -22,7 +22,7 @@ namespace mlir { @@ -842,7 +842,7 @@ index f2e9e839b7c4..5e5657980ba1 100644 module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir -index 75b29e22b4d2..21e615e60438 100644 +index 4b38db79bff3..0439844dc66c 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -1,16 +1,18 @@ @@ -894,3 +894,36 @@ index 75b29e22b4d2..21e615e60438 100644 } module attributes {transform.with_named_sequence} { +diff --git a/mlir/test/Examples/transform/ChH/full.mlir b/mlir/test/Examples/transform/ChH/full.mlir +index 259475ebdbf4..85dbf6702332 100644 +--- a/mlir/test/Examples/transform/ChH/full.mlir ++++ b/mlir/test/Examples/transform/ChH/full.mlir +@@ -1,8 +1,6 @@ +-// RUN: mlir-opt %s --transform-interpreter \ +-// RUN: --test-transform-dialect-erase-schedule \ +-// RUN: --math-uplift-to-fma \ +-// RUN: --convert-bufferization-to-memref \ +-// RUN: --test-lower-to-llvm |\ ++// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" \ ++// RUN: --test-transform-dialect-erase-schedule |\ ++// RUN: mlir-opt -pass-pipeline='builtin.module(builtin.module(math-uplift-to-fma,convert-bufferization-to-memref,test-lower-to-llvm))' - |\ + // RUN: FileCheck %s + + // Fixed-size tensor types to be used in convolution. +@@ -19,6 +17,7 @@ + // tensors annotated with attributes from the `bufferization` dialect. These + // attributes hint the bufferization pass to assume buffers can be directly + // used for these tensors without reshaping. ++module @payload attributes { transform.target_tag = "payload" } { + func.func @conv( + %input: !tinput {bufferization.writable = false, + bufferization.access = "read", +@@ -84,7 +83,7 @@ func.func @conv( + + return %relued : !toutput + } +- ++} + // Module containing the transformation script to be applied. The attribute + // is required to correctly verify the use of named (macro-like) sequences. + module attributes { transform.with_named_sequence } { From 66e7b06e2980fdd2c256c5d441b1a3175c58f749 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 14:26:14 -0400 Subject: [PATCH 088/183] Make gradient.ReturnOp ReturnLike --- mlir/include/Gradient/IR/GradientOps.h | 1 + mlir/include/Gradient/IR/GradientOps.td | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/Gradient/IR/GradientOps.h b/mlir/include/Gradient/IR/GradientOps.h index c6f6afadfe..a54e110043 100644 --- a/mlir/include/Gradient/IR/GradientOps.h +++ b/mlir/include/Gradient/IR/GradientOps.h @@ -21,6 +21,7 @@ #include "mlir/IR/OpImplementation.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Interfaces/CallInterfaces.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" #include "Gradient/IR/GradientInterfaces.h" diff --git a/mlir/include/Gradient/IR/GradientOps.td b/mlir/include/Gradient/IR/GradientOps.td index fbb43129fd..e30c4bf982 100644 --- a/mlir/include/Gradient/IR/GradientOps.td +++ b/mlir/include/Gradient/IR/GradientOps.td @@ -17,6 +17,7 @@ include "mlir/Interfaces/FunctionInterfaces.td" include "mlir/Interfaces/CallInterfaces.td" +include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/IR/BuiltinAttributes.td" include "mlir/IR/OpBase.td" @@ -388,7 +389,7 @@ def ReverseOp : Gradient_Op<"reverse", } def ReturnOp : Gradient_Op<"return", - [Terminator, ParentOneOf<["ForwardOp", "ReverseOp"]>]> { + [ReturnLike, Terminator, ParentOneOf<["ForwardOp", "ReverseOp"]>]> { let summary = "Return tapes or nothing"; From ea4f2d72227cd02616308da56577da6af8dd44e9 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 15:04:34 -0400 Subject: [PATCH 089/183] Draft new ForwardOp bufferization --- .../BufferizableOpInterfaceImpl.cpp | 95 ++++++++----------- 1 file changed, 38 insertions(+), 57 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index a5053f2598..1340c0bb7a 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -240,6 +240,7 @@ struct ForwardOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< ForwardOpInterface, ForwardOp> { static bool supportsUnstructuredControlFlow() { return true; } + bool hasTensorSemantics(Operation *op) const { auto isaTensor = llvm::IsaPred; @@ -280,14 +281,7 @@ struct ForwardOpInterface { auto forwardOp = cast(op); - auto argc = forwardOp.getArgc(); - auto resc = forwardOp.getResc(); - SmallVector inputs; - SmallVector differentials; - SmallVector outputs; - SmallVector cotangents; - - // Update signature + // Update ForwardOp's signature auto argTys = forwardOp.getArgumentTypes(); auto retTys = forwardOp.getResultTypes(); SmallVector emptyRets; @@ -303,72 +297,59 @@ struct ForwardOpInterface MemRefType::get(tensorType.getShape(), tensorType.getElementType())); } auto forwardTy = rewriter.getFunctionType(bufferArgs, emptyRets); - - Block *block; rewriter.modifyOpInPlace(op, [&] { forwardOp.setFunctionType(forwardTy); - block = forwardOp.addEntryBlock(); }); + // Get ForwardOp's block. + auto &block = forwardOp.getBody().front(); PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(block); - auto params = forwardOp.getArguments(); - - for (size_t i = 0; i < argc * 2; i++) { - bool isDup = (i % 2) != 0; - Value val = params[i]; - isDup ? differentials.push_back(val) : inputs.push_back(val); - } + rewriter.setInsertionPointToStart(&block); - auto upperLimit = (argc * 2) + (resc * 2); - for (size_t i = argc * 2; i < upperLimit; i++) { - bool isDup = (i % 2) != 0; - Value val = params[i]; - isDup ? cotangents.push_back(val) : outputs.push_back(val); - } + auto argc = forwardOp.getArgc(); + auto resc = forwardOp.getResc(); + // Get callee's implementation. auto implAttr = forwardOp.getImplementationAttr(); auto impl = forwardOp.getImplementation(); auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); auto implResTy = implOp.getResultTypes(); Location loc = forwardOp.getLoc(); - SmallVector tensorInputs; + // Create to_tensor if callee is not yet bufferized. + SmallVector inputs(forwardOp.getArguments()); + SmallVector calleeInputs; for (auto input : inputs) { - Value tensorIn = (isa(input.getType())) - ? input + auto tensorIn = (isa(input.getType())) ? input : rewriter.create(loc, input); - tensorInputs.push_back(tensorIn); + calleeInputs.push_back(tensorIn); } - auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); - SmallVector tensorOutputs(callOp.getResults()); - for (auto [memrefOutput, tensorOutput] : llvm::zip(outputs, tensorOutputs)) { - Value castVal = (isa(tensorOutput.getType())) - ? tensorOutput - : rewriter.create( - loc, memrefOutput.getType(), tensorOutput); - rewriter.create(loc, castVal, memrefOutput); - } - - auto tapeCount = forwardOp.getTape(); - SmallVector tapeOutputs; - tapeOutputs.insert(tapeOutputs.begin(), tensorOutputs.end() - tapeCount, - tensorOutputs.end()); - - SmallVector tapeMemrefOutputs; - for (auto [tapeTensorOutput, memrefTapeOutput] : - llvm::zip(tapeOutputs, forwardOp.getResultTypes())) { - Value castVal = (isa(tapeTensorOutput.getType())) - ? tapeTensorOutput - : rewriter.create(loc, memrefTapeOutput, - tapeTensorOutput); - tapeMemrefOutputs.push_back(castVal); - } - - auto F = rewriter.getIntegerAttr(rewriter.getI1Type(), 0); - rewriter.create(loc, tapeMemrefOutputs, F); + forwardOp.walk([&](func::CallOp callOp) { + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(callOp); + SmallVector inputs(callOp.getOperands()); + SmallVector calleeInputs; + for (auto input : inputs) { + auto tensorIn = (isa(input.getType())) ? input : + rewriter.create(loc, input); + calleeInputs.push_back(tensorIn); + } + rewriter.replaceOpWithNewOp(callOp, impl, implResTy, calleeInputs); + }); + forwardOp.walk([&](ReturnOp returnOp) { + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(returnOp); + SmallVector inputs(returnOp.getOperands()); + SmallVector returnInputs; + for (auto input : inputs) { + auto tensorIn = (isa(input.getType())) ? input : + rewriter.create(loc, input); + returnInputs.push_back(tensorIn); + } + rewriter.replaceOpWithNewOp(returnOp, returnInputs, returnOp.getEmpty()); + }); return success(); } }; @@ -376,7 +357,6 @@ struct ForwardOpInterface struct ReverseOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< ReverseOpInterface, ReverseOp> { - static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const @@ -417,6 +397,7 @@ struct ReverseOpInterface LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { + return failure(); auto reverseOp = cast(op); auto argc = reverseOp.getArgc(); From accf60a92d11447b12e852cd064100b35d017489 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 16:27:36 -0400 Subject: [PATCH 090/183] Add getBufferType method to ForwardOp and ReverseOp Interface --- .../BufferizableOpInterfaceImpl.cpp | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 1340c0bb7a..96c9752dc6 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -24,6 +24,28 @@ using namespace catalyst::gradient; namespace { +static BaseMemRefType +getBufferizedFunctionArgType(FunctionOpInterface funcOp, int64_t index, + const bufferization::BufferizationOptions &options) { + auto tensorType = + dyn_cast(funcOp.getArgument(index).getType()); + assert(tensorType && "expected TensorType"); + + BaseMemRefType memrefType = options.functionArgTypeConverterFn( + tensorType, *options.defaultMemorySpaceFn(tensorType), funcOp, options); + + auto layoutAttr = funcOp.getArgAttrOfType( + index, bufferization::BufferizationDialect::kBufferLayoutAttrName); + if (!layoutAttr) + return memrefType; + + auto rankedMemrefType = dyn_cast(memrefType); + assert(rankedMemrefType && "buffer layout not supported on unranked tensors"); + return MemRefType::get( + rankedMemrefType.getShape(), rankedMemrefType.getElementType(), + layoutAttr.getValue(), rankedMemrefType.getMemorySpace()); +} + Value generateAllocation(OpBuilder &builder, Location loc, Value reference) { auto origMemrefType = cast(reference.getType()); @@ -276,6 +298,21 @@ struct ForwardOpInterface return {}; } + FailureOr + getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, + SmallVector &invocationStack) const { + auto funcOp = cast(op); + auto bbArg = cast(value); + + // Function arguments are special. + if (bbArg.getOwner() == &funcOp.getBody().front()) + return getBufferizedFunctionArgType(funcOp, bbArg.getArgNumber(), + options); + + return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel:: + getBufferType(op, value, options, invocationStack); + } + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { @@ -394,6 +431,21 @@ struct ReverseOpInterface return {}; } + FailureOr + getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, + SmallVector &invocationStack) const { + auto funcOp = cast(op); + auto bbArg = cast(value); + + // Function arguments are special. + if (bbArg.getOwner() == &funcOp.getBody().front()) + return getBufferizedFunctionArgType(funcOp, bbArg.getArgNumber(), + options); + + return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel:: + getBufferType(op, value, options, invocationStack); + } + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { From 99c517a8da8853995fa6e3c8c9395172cbb84a6c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 16:47:17 -0400 Subject: [PATCH 091/183] Cleanup unused read-write flag in ForwardOp and ReverseOp --- .../BufferizableOpInterfaceImpl.cpp | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 96c9752dc6..c9b8a7c84e 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -279,18 +279,6 @@ struct ForwardOpInterface return false; } - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return true; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return true; - } - bufferization::AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const @@ -412,18 +400,6 @@ struct ReverseOpInterface return false; } - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return true; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return true; - } - bufferization::AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const From c7768beec2788a20d2477035270f0bc8afe2e610 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 17 Sep 2024 16:55:35 -0400 Subject: [PATCH 092/183] Copy methods from FuncOp to ForwardOp and ReverseOp --- .../BufferizableOpInterfaceImpl.cpp | 49 +++++++++++++++---- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index c9b8a7c84e..8234fe35e1 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,6 +11,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Transforms/DialectConversion.h" #include "Gradient/IR/GradientOps.h" @@ -46,6 +47,18 @@ getBufferizedFunctionArgType(FunctionOpInterface funcOp, int64_t index, layoutAttr.getValue(), rankedMemrefType.getMemorySpace()); } +static ReturnOp getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { + ReturnOp returnOp; + for (Block &b : funcOp.getFunctionBody()) { + if (auto candidateOp = dyn_cast(b.getTerminator())) { + if (returnOp) + return nullptr; + returnOp = candidateOp; + } + } + return returnOp; +} + Value generateAllocation(OpBuilder &builder, Location loc, Value reference) { auto origMemrefType = cast(reference.getType()); @@ -279,11 +292,10 @@ struct ForwardOpInterface return false; } - bufferization::AliasingValueList - getAliasingValues(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return {}; + bufferization::AliasingOpOperandList + getAliasingOpOperands(Operation *op, Value value, + const bufferization::AnalysisState &state) const { + return getAliasingBranchOpOperands(op, cast(value), state); } FailureOr @@ -301,6 +313,15 @@ struct ForwardOpInterface getBufferType(op, value, options, invocationStack); } + LogicalResult verifyAnalysis(Operation *op, + const bufferization::AnalysisState &state) const { + auto funcOp = cast(op); + // TODO: func.func with multiple returns are not supported. + if (!getAssumedUniqueReturnOp(funcOp) && !funcOp.isExternal()) + return op->emitOpError("op without unique func.return is not supported"); + return success(); + } + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { @@ -400,11 +421,10 @@ struct ReverseOpInterface return false; } - bufferization::AliasingValueList - getAliasingValues(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return {}; + bufferization::AliasingOpOperandList + getAliasingOpOperands(Operation *op, Value value, + const bufferization::AnalysisState &state) const { + return getAliasingBranchOpOperands(op, cast(value), state); } FailureOr @@ -422,6 +442,15 @@ struct ReverseOpInterface getBufferType(op, value, options, invocationStack); } + LogicalResult verifyAnalysis(Operation *op, + const bufferization::AnalysisState &state) const { + auto funcOp = cast(op); + // TODO: func.func with multiple returns are not supported. + if (!getAssumedUniqueReturnOp(funcOp) && !funcOp.isExternal()) + return op->emitOpError("op without unique func.return is not supported"); + return success(); + } + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { From 42d224adf5a95c72e060e51029496795a2102de9 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 11:55:49 -0400 Subject: [PATCH 093/183] Draft new ReverseOp Interface and fix its getBufferType --- .../BufferizableOpInterfaceImpl.cpp | 102 +++++++----------- 1 file changed, 40 insertions(+), 62 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 8234fe35e1..a0ef375cf7 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -430,7 +430,7 @@ struct ReverseOpInterface FailureOr getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, SmallVector &invocationStack) const { - auto funcOp = cast(op); + auto funcOp = cast(op); auto bbArg = cast(value); // Function arguments are special. @@ -454,18 +454,9 @@ struct ReverseOpInterface LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const bufferization::BufferizationOptions &options) const { - return failure(); auto reverseOp = cast(op); - auto argc = reverseOp.getArgc(); - auto resc = reverseOp.getResc(); - SmallVector inputs; - SmallVector differentials; - SmallVector outputs; - SmallVector cotangents; - SmallVector tapeElements; - - // Update signature + // Update ReverseOp's signature auto argTys = reverseOp.getArgumentTypes(); auto retTys = reverseOp.getResultTypes(); SmallVector emptyRets; @@ -481,72 +472,59 @@ struct ReverseOpInterface MemRefType::get(tensorType.getShape(), tensorType.getElementType())); } auto reverseTy = rewriter.getFunctionType(bufferArgs, emptyRets); - - Block *block; rewriter.modifyOpInPlace(op, [&] { reverseOp.setFunctionType(reverseTy); - block = reverseOp.addEntryBlock(); }); + // Get ForwardOp's block. + auto &block = reverseOp.getBody().front(); PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(block); - auto params = reverseOp.getArguments(); - - for (size_t i = 0; i < argc * 2; i++) { - bool isDup = (i % 2) != 0; - Value val = params[i]; - isDup ? differentials.push_back(val) : inputs.push_back(val); - } - - auto upperLimit = (argc * 2) + (resc * 2); - for (size_t i = argc * 2; i < upperLimit; i++) { - bool isDup = (i % 2) != 0; - Value val = params[i]; - isDup ? cotangents.push_back(val) : outputs.push_back(val); - } + rewriter.setInsertionPointToStart(&block); - auto tapeCount = reverseOp.getTape(); - auto uppestLimit = upperLimit + tapeCount; - for (size_t i = upperLimit; i < uppestLimit; i++) { - tapeElements.push_back(params[i]); - } + auto argc = reverseOp.getArgc(); + auto resc = reverseOp.getResc(); + // Get callee's implementation. auto implAttr = reverseOp.getImplementationAttr(); auto impl = reverseOp.getImplementation(); - auto implOp = - SymbolTable::lookupNearestSymbolFrom(reverseOp, implAttr); + auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); auto implResTy = implOp.getResultTypes(); Location loc = reverseOp.getLoc(); - SmallVector tensorInputs; - for (auto tapeElement : tapeElements) { - Value tensorIn = (isa(tapeElement.getType())) - ? tapeElement - : rewriter.create(loc, tapeElement); - tensorInputs.push_back(tensorIn); - } - - for (auto cotangent : cotangents) { - Value tensorIn = (isa(cotangent.getType())) - ? cotangent - : rewriter.create(loc, cotangent); - tensorInputs.push_back(tensorIn); - } - - auto callOp = rewriter.create(loc, impl, implResTy, tensorInputs); - SmallVector tensorOutputs(callOp.getResults()); - - for (auto [differential, tensorOutput] : llvm::zip(differentials, tensorOutputs)) { - Value castVal = (isa(tensorOutput.getType())) - ? tensorOutput - : rewriter.create( - loc, differential.getType(), tensorOutput); - rewriter.create(loc, castVal, differential); + // Create to_tensor if callee is not yet bufferized. + SmallVector inputs(reverseOp.getArguments()); + SmallVector calleeInputs; + for (auto input : inputs) { + auto tensorIn = (isa(input.getType())) ? input + : rewriter.create(loc, input); + calleeInputs.push_back(tensorIn); } - auto T = rewriter.getIntegerAttr(rewriter.getI1Type(), 1); - rewriter.create(loc, ValueRange{}, T); + reverseOp.walk([&](func::CallOp callOp) { + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(callOp); + SmallVector inputs(callOp.getOperands()); + SmallVector calleeInputs; + for (auto input : inputs) { + auto tensorIn = (isa(input.getType())) ? input : + rewriter.create(loc, input); + calleeInputs.push_back(tensorIn); + } + rewriter.replaceOpWithNewOp(callOp, impl, implResTy, calleeInputs); + }); + reverseOp.walk([&](ReturnOp returnOp) { + PatternRewriter::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(returnOp); + SmallVector inputs(returnOp.getOperands()); + SmallVector returnInputs; + for (auto input : inputs) { + auto tensorIn = (isa(input.getType())) ? input : + rewriter.create(loc, input); + returnInputs.push_back(tensorIn); + } + rewriter.replaceOpWithNewOp(returnOp, returnInputs, returnOp.getEmpty()); + }); return success(); } }; From fedd6686d425387dbec2e31f18cd0cc55a9cac08 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 12:35:56 -0400 Subject: [PATCH 094/183] Make ForwardOp and ReverseOp follow FuncOp bufferization steps --- .../BufferizableOpInterfaceImpl.cpp | 240 ++++++++---------- 1 file changed, 108 insertions(+), 132 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index a0ef375cf7..2a7e8497d1 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -301,12 +301,12 @@ struct ForwardOpInterface FailureOr getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, SmallVector &invocationStack) const { - auto funcOp = cast(op); + auto forwardOp = cast(op); auto bbArg = cast(value); // Function arguments are special. - if (bbArg.getOwner() == &funcOp.getBody().front()) - return getBufferizedFunctionArgType(funcOp, bbArg.getArgNumber(), + if (bbArg.getOwner() == &forwardOp.getBody().front()) + return getBufferizedFunctionArgType(forwardOp, bbArg.getArgNumber(), options); return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel:: @@ -315,9 +315,9 @@ struct ForwardOpInterface LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const { - auto funcOp = cast(op); + auto forwardOp = cast(op); // TODO: func.func with multiple returns are not supported. - if (!getAssumedUniqueReturnOp(funcOp) && !funcOp.isExternal()) + if (!getAssumedUniqueReturnOp(forwardOp)) return op->emitOpError("op without unique func.return is not supported"); return success(); } @@ -326,76 +326,64 @@ struct ForwardOpInterface const bufferization::BufferizationOptions &options) const { auto forwardOp = cast(op); - - // Update ForwardOp's signature - auto argTys = forwardOp.getArgumentTypes(); - auto retTys = forwardOp.getResultTypes(); - SmallVector emptyRets; - SmallVector args(argTys.begin(), argTys.end()); - args.insert(args.end(), retTys.begin(), retTys.end()); - SmallVector bufferArgs; - for (Type ty : args) { - auto tensorType = dyn_cast(ty); - if (!tensorType) - bufferArgs.push_back(ty); - else - bufferArgs.push_back( - MemRefType::get(tensorType.getShape(), tensorType.getElementType())); + FunctionType funcType = forwardOp.getFunctionType(); + + // Construct the bufferized function type. + SmallVector argTypes; + for (const auto &it : llvm::enumerate(funcType.getInputs())) { + Type argType = it.value(); + if (dyn_cast(argType)) { + argTypes.push_back( + getBufferizedFunctionArgType(forwardOp, it.index(), options)); + continue; + } + argTypes.push_back(argType); } - auto forwardTy = rewriter.getFunctionType(bufferArgs, emptyRets); - rewriter.modifyOpInPlace(op, [&] { - forwardOp.setFunctionType(forwardTy); - }); - // Get ForwardOp's block. - auto &block = forwardOp.getBody().front(); - PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(&block); - - auto argc = forwardOp.getArgc(); - auto resc = forwardOp.getResc(); - - // Get callee's implementation. - auto implAttr = forwardOp.getImplementationAttr(); - auto impl = forwardOp.getImplementation(); - auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); - auto implResTy = implOp.getResultTypes(); - Location loc = forwardOp.getLoc(); - - // Create to_tensor if callee is not yet bufferized. - SmallVector inputs(forwardOp.getArguments()); - SmallVector calleeInputs; - for (auto input : inputs) { - auto tensorIn = (isa(input.getType())) ? input - : rewriter.create(loc, input); - calleeInputs.push_back(tensorIn); - } + ReturnOp returnOp = getAssumedUniqueReturnOp(forwardOp); + assert(returnOp && "expected func with single return op"); + Location loc = returnOp.getLoc(); - forwardOp.walk([&](func::CallOp callOp) { - PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPoint(callOp); - SmallVector inputs(callOp.getOperands()); - SmallVector calleeInputs; - for (auto input : inputs) { - auto tensorIn = (isa(input.getType())) ? input : - rewriter.create(loc, input); - calleeInputs.push_back(tensorIn); + // 1. Bufferize every block. + for (Block &block : forwardOp.getBody()) + if (failed(bufferization::bufferizeBlockSignature(&block, rewriter, + options))) + return failure(); + + // 2. For each result, keep track of which inplace argument it reuses. + SmallVector returnValues; + for (OpOperand &returnOperand : returnOp->getOpOperands()) { + Value returnVal = returnOperand.get(); + auto tensorType = dyn_cast(returnVal.getType()); + rewriter.setInsertionPoint(returnOp); + + // If not a tensor type just forward it. + if (!tensorType) { + returnValues.push_back(returnVal); + continue; } - rewriter.replaceOpWithNewOp(callOp, impl, implResTy, calleeInputs); - }); + // Note: If `inferFunctionResultLayout = true`, cast are later folded + // away. + BaseMemRefType resultType = options.functionArgTypeConverterFn( + tensorType, *options.defaultMemorySpaceFn(tensorType), forwardOp, + options); + Value toMemrefOp = rewriter.create( + loc, resultType, returnVal); + returnValues.push_back(toMemrefOp); + } + + // 3. Rewrite the terminator. forwardOp.walk([&](ReturnOp returnOp) { PatternRewriter::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(returnOp); - SmallVector inputs(returnOp.getOperands()); - SmallVector returnInputs; - for (auto input : inputs) { - auto tensorIn = (isa(input.getType())) ? input : - rewriter.create(loc, input); - returnInputs.push_back(tensorIn); - } - rewriter.replaceOpWithNewOp(returnOp, returnInputs, returnOp.getEmpty()); + rewriter.replaceOpWithNewOp(returnOp, returnValues, returnOp.getEmpty()); }); + + // 4. Rewrite the FuncOp type to buffer form. + forwardOp.setType(FunctionType::get(op->getContext(), argTypes, + ValueRange(returnValues).getTypes())); + return success(); } }; @@ -430,12 +418,12 @@ struct ReverseOpInterface FailureOr getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, SmallVector &invocationStack) const { - auto funcOp = cast(op); + auto reverseOp = cast(op); auto bbArg = cast(value); // Function arguments are special. - if (bbArg.getOwner() == &funcOp.getBody().front()) - return getBufferizedFunctionArgType(funcOp, bbArg.getArgNumber(), + if (bbArg.getOwner() == &reverseOp.getBody().front()) + return getBufferizedFunctionArgType(reverseOp, bbArg.getArgNumber(), options); return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel:: @@ -444,9 +432,9 @@ struct ReverseOpInterface LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const { - auto funcOp = cast(op); + auto reverseOp = cast(op); // TODO: func.func with multiple returns are not supported. - if (!getAssumedUniqueReturnOp(funcOp) && !funcOp.isExternal()) + if (!getAssumedUniqueReturnOp(reverseOp)) return op->emitOpError("op without unique func.return is not supported"); return success(); } @@ -455,76 +443,64 @@ struct ReverseOpInterface const bufferization::BufferizationOptions &options) const { auto reverseOp = cast(op); - - // Update ReverseOp's signature - auto argTys = reverseOp.getArgumentTypes(); - auto retTys = reverseOp.getResultTypes(); - SmallVector emptyRets; - SmallVector args(argTys.begin(), argTys.end()); - args.insert(args.end(), retTys.begin(), retTys.end()); - SmallVector bufferArgs; - for (Type ty : args) { - auto tensorType = dyn_cast(ty); - if (!tensorType) - bufferArgs.push_back(ty); - else - bufferArgs.push_back( - MemRefType::get(tensorType.getShape(), tensorType.getElementType())); + FunctionType funcType = reverseOp.getFunctionType(); + + // Construct the bufferized function type. + SmallVector argTypes; + for (const auto &it : llvm::enumerate(funcType.getInputs())) { + Type argType = it.value(); + if (dyn_cast(argType)) { + argTypes.push_back( + getBufferizedFunctionArgType(reverseOp, it.index(), options)); + continue; + } + argTypes.push_back(argType); } - auto reverseTy = rewriter.getFunctionType(bufferArgs, emptyRets); - rewriter.modifyOpInPlace(op, [&] { - reverseOp.setFunctionType(reverseTy); - }); - // Get ForwardOp's block. - auto &block = reverseOp.getBody().front(); - PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPointToStart(&block); - - auto argc = reverseOp.getArgc(); - auto resc = reverseOp.getResc(); - - // Get callee's implementation. - auto implAttr = reverseOp.getImplementationAttr(); - auto impl = reverseOp.getImplementation(); - auto implOp = SymbolTable::lookupNearestSymbolFrom(op, implAttr); - auto implResTy = implOp.getResultTypes(); - Location loc = reverseOp.getLoc(); - - // Create to_tensor if callee is not yet bufferized. - SmallVector inputs(reverseOp.getArguments()); - SmallVector calleeInputs; - for (auto input : inputs) { - auto tensorIn = (isa(input.getType())) ? input - : rewriter.create(loc, input); - calleeInputs.push_back(tensorIn); - } + ReturnOp returnOp = getAssumedUniqueReturnOp(reverseOp); + assert(returnOp && "expected func with single return op"); + Location loc = returnOp.getLoc(); - reverseOp.walk([&](func::CallOp callOp) { - PatternRewriter::InsertionGuard guard(rewriter); - rewriter.setInsertionPoint(callOp); - SmallVector inputs(callOp.getOperands()); - SmallVector calleeInputs; - for (auto input : inputs) { - auto tensorIn = (isa(input.getType())) ? input : - rewriter.create(loc, input); - calleeInputs.push_back(tensorIn); + // 1. Bufferize every block. + for (Block &block : reverseOp.getBody()) + if (failed(bufferization::bufferizeBlockSignature(&block, rewriter, + options))) + return failure(); + + // 2. For each result, keep track of which inplace argument it reuses. + SmallVector returnValues; + for (OpOperand &returnOperand : returnOp->getOpOperands()) { + Value returnVal = returnOperand.get(); + auto tensorType = dyn_cast(returnVal.getType()); + rewriter.setInsertionPoint(returnOp); + + // If not a tensor type just forward it. + if (!tensorType) { + returnValues.push_back(returnVal); + continue; } - rewriter.replaceOpWithNewOp(callOp, impl, implResTy, calleeInputs); - }); + // Note: If `inferFunctionResultLayout = true`, cast are later folded + // away. + BaseMemRefType resultType = options.functionArgTypeConverterFn( + tensorType, *options.defaultMemorySpaceFn(tensorType), reverseOp, + options); + Value toMemrefOp = rewriter.create( + loc, resultType, returnVal); + returnValues.push_back(toMemrefOp); + } + + // 3. Rewrite the terminator. reverseOp.walk([&](ReturnOp returnOp) { PatternRewriter::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(returnOp); - SmallVector inputs(returnOp.getOperands()); - SmallVector returnInputs; - for (auto input : inputs) { - auto tensorIn = (isa(input.getType())) ? input : - rewriter.create(loc, input); - returnInputs.push_back(tensorIn); - } - rewriter.replaceOpWithNewOp(returnOp, returnInputs, returnOp.getEmpty()); + rewriter.replaceOpWithNewOp(returnOp, returnValues, returnOp.getEmpty()); }); + + // 4. Rewrite the FuncOp type to buffer form. + reverseOp.setType(FunctionType::get(op->getContext(), argTypes, + ValueRange(returnValues).getTypes())); + return success(); } }; From 0473d72fae5875589cbb521d434e5c0851ab5340 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 12:43:19 -0400 Subject: [PATCH 095/183] Make gradient-preprocess happen before eliminate-empty-tensors to pass ReturnOp check --- frontend/catalyst/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index f515699d5c..2041560145 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -206,9 +206,9 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt BUFFERIZATION_PASS = ( "BufferizationPass", [ + "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "gradient-preprocess", "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", "canonicalize", "gradient-postprocess", From f5c4da0e9fe1af82fed401316e3f2dddc1f0ef37 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 12:48:49 -0400 Subject: [PATCH 096/183] Reformatting --- .../BufferizableOpInterfaceImpl.cpp | 126 +++++++++--------- 1 file changed, 61 insertions(+), 65 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 2a7e8497d1..19fbb27a1c 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -27,9 +27,9 @@ namespace { static BaseMemRefType getBufferizedFunctionArgType(FunctionOpInterface funcOp, int64_t index, - const bufferization::BufferizationOptions &options) { - auto tensorType = - dyn_cast(funcOp.getArgument(index).getType()); + const bufferization::BufferizationOptions &options) +{ + auto tensorType = dyn_cast(funcOp.getArgument(index).getType()); assert(tensorType && "expected TensorType"); BaseMemRefType memrefType = options.functionArgTypeConverterFn( @@ -42,21 +42,21 @@ getBufferizedFunctionArgType(FunctionOpInterface funcOp, int64_t index, auto rankedMemrefType = dyn_cast(memrefType); assert(rankedMemrefType && "buffer layout not supported on unranked tensors"); - return MemRefType::get( - rankedMemrefType.getShape(), rankedMemrefType.getElementType(), - layoutAttr.getValue(), rankedMemrefType.getMemorySpace()); + return MemRefType::get(rankedMemrefType.getShape(), rankedMemrefType.getElementType(), + layoutAttr.getValue(), rankedMemrefType.getMemorySpace()); } -static ReturnOp getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { - ReturnOp returnOp; - for (Block &b : funcOp.getFunctionBody()) { - if (auto candidateOp = dyn_cast(b.getTerminator())) { - if (returnOp) - return nullptr; - returnOp = candidateOp; +static ReturnOp getAssumedUniqueReturnOp(FunctionOpInterface funcOp) +{ + ReturnOp returnOp; + for (Block &b : funcOp.getFunctionBody()) { + if (auto candidateOp = dyn_cast(b.getTerminator())) { + if (returnOp) + return nullptr; + returnOp = candidateOp; + } } - } - return returnOp; + return returnOp; } Value generateAllocation(OpBuilder &builder, Location loc, Value reference) @@ -294,31 +294,32 @@ struct ForwardOpInterface bufferization::AliasingOpOperandList getAliasingOpOperands(Operation *op, Value value, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return getAliasingBranchOpOperands(op, cast(value), state); } - FailureOr - getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, - SmallVector &invocationStack) const { + FailureOr getBufferType(Operation *op, Value value, + const bufferization::BufferizationOptions &options, + SmallVector &invocationStack) const + { auto forwardOp = cast(op); auto bbArg = cast(value); // Function arguments are special. if (bbArg.getOwner() == &forwardOp.getBody().front()) - return getBufferizedFunctionArgType(forwardOp, bbArg.getArgNumber(), - options); + return getBufferizedFunctionArgType(forwardOp, bbArg.getArgNumber(), options); - return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel:: - getBufferType(op, value, options, invocationStack); + return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel::getBufferType( + op, value, options, invocationStack); } - LogicalResult verifyAnalysis(Operation *op, - const bufferization::AnalysisState &state) const { - auto forwardOp = cast(op); + LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const + { + auto forwardOp = cast(op); // TODO: func.func with multiple returns are not supported. if (!getAssumedUniqueReturnOp(forwardOp)) - return op->emitOpError("op without unique func.return is not supported"); + return op->emitOpError("op without unique func.return is not supported"); return success(); } @@ -333,8 +334,7 @@ struct ForwardOpInterface for (const auto &it : llvm::enumerate(funcType.getInputs())) { Type argType = it.value(); if (dyn_cast(argType)) { - argTypes.push_back( - getBufferizedFunctionArgType(forwardOp, it.index(), options)); + argTypes.push_back(getBufferizedFunctionArgType(forwardOp, it.index(), options)); continue; } argTypes.push_back(argType); @@ -346,9 +346,8 @@ struct ForwardOpInterface // 1. Bufferize every block. for (Block &block : forwardOp.getBody()) - if (failed(bufferization::bufferizeBlockSignature(&block, rewriter, - options))) - return failure(); + if (failed(bufferization::bufferizeBlockSignature(&block, rewriter, options))) + return failure(); // 2. For each result, keep track of which inplace argument it reuses. SmallVector returnValues; @@ -359,17 +358,16 @@ struct ForwardOpInterface // If not a tensor type just forward it. if (!tensorType) { - returnValues.push_back(returnVal); - continue; + returnValues.push_back(returnVal); + continue; } // Note: If `inferFunctionResultLayout = true`, cast are later folded // away. BaseMemRefType resultType = options.functionArgTypeConverterFn( - tensorType, *options.defaultMemorySpaceFn(tensorType), forwardOp, - options); - Value toMemrefOp = rewriter.create( - loc, resultType, returnVal); + tensorType, *options.defaultMemorySpaceFn(tensorType), forwardOp, options); + Value toMemrefOp = + rewriter.create(loc, resultType, returnVal); returnValues.push_back(toMemrefOp); } @@ -381,8 +379,8 @@ struct ForwardOpInterface }); // 4. Rewrite the FuncOp type to buffer form. - forwardOp.setType(FunctionType::get(op->getContext(), argTypes, - ValueRange(returnValues).getTypes())); + forwardOp.setType( + FunctionType::get(op->getContext(), argTypes, ValueRange(returnValues).getTypes())); return success(); } @@ -411,31 +409,32 @@ struct ReverseOpInterface bufferization::AliasingOpOperandList getAliasingOpOperands(Operation *op, Value value, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return getAliasingBranchOpOperands(op, cast(value), state); } - FailureOr - getBufferType(Operation *op, Value value, const bufferization::BufferizationOptions &options, - SmallVector &invocationStack) const { + FailureOr getBufferType(Operation *op, Value value, + const bufferization::BufferizationOptions &options, + SmallVector &invocationStack) const + { auto reverseOp = cast(op); auto bbArg = cast(value); // Function arguments are special. if (bbArg.getOwner() == &reverseOp.getBody().front()) - return getBufferizedFunctionArgType(reverseOp, bbArg.getArgNumber(), - options); + return getBufferizedFunctionArgType(reverseOp, bbArg.getArgNumber(), options); - return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel:: - getBufferType(op, value, options, invocationStack); + return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel::getBufferType( + op, value, options, invocationStack); } - LogicalResult verifyAnalysis(Operation *op, - const bufferization::AnalysisState &state) const { - auto reverseOp = cast(op); + LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const + { + auto reverseOp = cast(op); // TODO: func.func with multiple returns are not supported. if (!getAssumedUniqueReturnOp(reverseOp)) - return op->emitOpError("op without unique func.return is not supported"); + return op->emitOpError("op without unique func.return is not supported"); return success(); } @@ -450,8 +449,7 @@ struct ReverseOpInterface for (const auto &it : llvm::enumerate(funcType.getInputs())) { Type argType = it.value(); if (dyn_cast(argType)) { - argTypes.push_back( - getBufferizedFunctionArgType(reverseOp, it.index(), options)); + argTypes.push_back(getBufferizedFunctionArgType(reverseOp, it.index(), options)); continue; } argTypes.push_back(argType); @@ -463,9 +461,8 @@ struct ReverseOpInterface // 1. Bufferize every block. for (Block &block : reverseOp.getBody()) - if (failed(bufferization::bufferizeBlockSignature(&block, rewriter, - options))) - return failure(); + if (failed(bufferization::bufferizeBlockSignature(&block, rewriter, options))) + return failure(); // 2. For each result, keep track of which inplace argument it reuses. SmallVector returnValues; @@ -476,17 +473,16 @@ struct ReverseOpInterface // If not a tensor type just forward it. if (!tensorType) { - returnValues.push_back(returnVal); - continue; + returnValues.push_back(returnVal); + continue; } // Note: If `inferFunctionResultLayout = true`, cast are later folded // away. BaseMemRefType resultType = options.functionArgTypeConverterFn( - tensorType, *options.defaultMemorySpaceFn(tensorType), reverseOp, - options); - Value toMemrefOp = rewriter.create( - loc, resultType, returnVal); + tensorType, *options.defaultMemorySpaceFn(tensorType), reverseOp, options); + Value toMemrefOp = + rewriter.create(loc, resultType, returnVal); returnValues.push_back(toMemrefOp); } @@ -498,8 +494,8 @@ struct ReverseOpInterface }); // 4. Rewrite the FuncOp type to buffer form. - reverseOp.setType(FunctionType::get(op->getContext(), argTypes, - ValueRange(returnValues).getTypes())); + reverseOp.setType( + FunctionType::get(op->getContext(), argTypes, ValueRange(returnValues).getTypes())); return success(); } From d42b123a585e28d6e2bd34f1b3ba132ee7b3f714 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 13:56:17 -0400 Subject: [PATCH 097/183] Update llvm patch with FunctionArgTypeConverterFn --- mlir/patches/moduleOp-bufferization.patch | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/moduleOp-bufferization.patch index 683f1d51a3..f66ca1b529 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/moduleOp-bufferization.patch @@ -1,3 +1,24 @@ +diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +index 2fda091e412a..ba28596d1f97 100644 +--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h ++++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +@@ -9,6 +9,7 @@ + #ifndef MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZABLEOPINTERFACE_H_ + #define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZABLEOPINTERFACE_H_ + ++#include "mlir/Interfaces/FunctionInterfaces.h" + #include "mlir/IR/Operation.h" + #include "mlir/IR/PatternMatch.h" + #include "mlir/Support/LLVM.h" +@@ -262,7 +263,7 @@ struct BufferizationOptions { + /// Parameters: Value, memory space, func op, bufferization options + using FunctionArgTypeConverterFn = + std::function; ++ FunctionOpInterface, const BufferizationOptions &)>; + /// Tensor -> MemRef type converter. + /// Parameters: Value, memory space, bufferization options + using UnknownTypeConverterFn = std::function Date: Wed, 18 Sep 2024 15:42:42 -0400 Subject: [PATCH 098/183] Preserve unused return types during bufferization --- .../BufferizableOpInterfaceImpl.cpp | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 19fbb27a1c..4eb9e3bd52 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -378,9 +378,16 @@ struct ForwardOpInterface rewriter.replaceOpWithNewOp(returnOp, returnValues, returnOp.getEmpty()); }); - // 4. Rewrite the FuncOp type to buffer form. + // 4. Rewrite the FuncOp type to buffer form. Also preserve unused return types. + SmallVector returnTypes; + for (auto retTy : forwardOp.getResultTypes()) { + auto tensorType = dyn_cast(retTy); + BaseMemRefType resultType = options.functionArgTypeConverterFn( + tensorType, *options.defaultMemorySpaceFn(tensorType), forwardOp, options); + returnTypes.push_back(resultType); + } forwardOp.setType( - FunctionType::get(op->getContext(), argTypes, ValueRange(returnValues).getTypes())); + FunctionType::get(op->getContext(), argTypes, returnTypes)); return success(); } @@ -493,9 +500,16 @@ struct ReverseOpInterface rewriter.replaceOpWithNewOp(returnOp, returnValues, returnOp.getEmpty()); }); - // 4. Rewrite the FuncOp type to buffer form. + // 4. Rewrite the FuncOp type to buffer form. Also preserve unused return types. + SmallVector returnTypes; + for (auto retTy : reverseOp.getResultTypes()) { + auto tensorType = dyn_cast(retTy); + BaseMemRefType resultType = options.functionArgTypeConverterFn( + tensorType, *options.defaultMemorySpaceFn(tensorType), reverseOp, options); + returnTypes.push_back(resultType); + } reverseOp.setType( - FunctionType::get(op->getContext(), argTypes, ValueRange(returnValues).getTypes())); + FunctionType::get(op->getContext(), argTypes, returnTypes)); return success(); } From de97c6bec5c5deeb606e0de8ba565d05d55cf1c9 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 16:09:39 -0400 Subject: [PATCH 099/183] Use identity-layout-map to prevent unused returns from being removed --- frontend/catalyst/compiler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 2041560145..c20de525ea 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -209,7 +209,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops" + " function-boundary-type-conversion=identity-layout-map}", "canonicalize", "gradient-postprocess", "convert-arraylist-to-memref", From 42b3870aca4b755e086532bdd80ac5f71599d244 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 17:15:21 -0400 Subject: [PATCH 100/183] revert identiy-layout-map --- frontend/catalyst/compiler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index c20de525ea..2041560145 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -209,8 +209,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops" - " function-boundary-type-conversion=identity-layout-map}", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", "canonicalize", "gradient-postprocess", "convert-arraylist-to-memref", From ee02d7e0ba52b72a7341c78528a8f98b324a8859 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 21:48:54 -0400 Subject: [PATCH 101/183] Restore ReserseOp's signature if it is optimized away by one-shot-bufferize --- .../Transforms/PostprocessingPatterns.cpp | 77 ++++++++++++++++++- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp index 5c79663f0d..731ad4d5a9 100644 --- a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp @@ -14,12 +14,14 @@ #include "iostream" #include "llvm/Support/raw_ostream.h" +#include #include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" @@ -41,12 +43,14 @@ struct PostprocessForwardOp : public OpRewritePattern { // Check if the numbers of args and returns match Enzyme's format. auto argc = op.getArgc(); auto resc = op.getResc(); - auto tapeCount = op.getTape(); + auto tape = op.getTape(); - if (op.getFunctionType().getNumInputs() == (argc + resc) * 2 && - op.getFunctionType().getNumResults() == tapeCount) + // If function signature is modified, this pass cannot be processed. + if (op.getFunctionType().getNumInputs() != argc || + op.getFunctionType().getNumResults() != (resc + tape)) return failure(); + auto argTys = op.getArgumentTypes(); auto retTys = op.getResultTypes(); SmallVector bufferArgs; @@ -127,7 +131,9 @@ struct PostprocessReverseOp : public OpRewritePattern { auto forwardResc = op.getResc(); auto tape = op.getTape(); - if (op.getFunctionType().getNumInputs() == (forwardArgc + forwardResc) * 2 + tape) + // If function signature is modified, this pass cannot be processed. + if (op.getFunctionType().getNumInputs() != (forwardResc + tape) || + op.getFunctionType().getNumResults() != forwardArgc) return failure(); auto argTys = op.getArgumentTypes(); @@ -200,6 +206,68 @@ struct PostprocessReverseOp : public OpRewritePattern { } }; +struct RestoreReverseOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult matchAndRewrite(ReverseOp op, + mlir::PatternRewriter &rewriter) const override + { + // ReverseOp's output is optimized away by one-shot bufferize. + auto forwardArgc = op.getArgc(); + auto forwardResc = op.getResc(); + auto tape = op.getTape(); + + // Check if the Op is post-processed. + if (op.getFunctionType().getNumInputs() == (forwardResc + forwardArgc) * 2 + tape) + return failure(); + + // If function signature is modified, this pass cannot be processed. + if (op.getFunctionType().getNumResults() >= forwardArgc) + return failure(); + + // get parenet module + auto module = op->getParentOfType(); + + // Get GradOp + CustomGradOp gradCaller = nullptr; + for (auto gradOp : module.getOps()) { + if (gradOp.getReverse() == op.getSymName()) { + gradCaller = gradOp; + } + } + + if (!gradCaller) + return failure(); + + ForwardOp target = nullptr; + // get corresponding FowardOp + for (auto forwardOp : module.getOps()) { + if (forwardOp.getSymName() == gradCaller.getForward()) { + target = forwardOp; + } + } + + if (!target) + return failure(); + + auto forwardArgTys = target.getArgumentTypes(); + SmallVector noTapeTys; + for (size_t i = 0 ; i < forwardArgTys.size(); ++i) { + if (i < op.getArgc()) { + noTapeTys.push_back(forwardArgTys[i]); + } + } + + auto reverseTy = rewriter.getFunctionType(op.getArgumentTypes(), noTapeTys); + + rewriter.modifyOpInPlace(op, [&] { + op.setFunctionType(reverseTy); + }); + + return failure(); + } +}; + } // namespace namespace catalyst { @@ -207,6 +275,7 @@ namespace gradient { void populatePostprocessingPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); patterns.add(patterns.getContext()); patterns.add(patterns.getContext()); } From df63f1a46854653759bbd98e7d6793293e29a4d4 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 18 Sep 2024 22:24:35 -0400 Subject: [PATCH 102/183] Use replaceOpWithNewBufferizedOp for quantum.unitary --- .../lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 08689c18a9..7f4bb2bfe9 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -44,14 +44,11 @@ struct QubitUnitaryOpInterface auto toMemrefOp = rewriter.create(loc, memrefType, qubitUnitaryOp.getMatrix()); auto memref = toMemrefOp.getResult(); - auto newQubitUnitaryOp = rewriter.create( - loc, qubitUnitaryOp.getOutQubits().getTypes(), + bufferization::replaceOpWithNewBufferizedOp(rewriter, op, + qubitUnitaryOp.getOutQubits().getTypes(), qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, qubitUnitaryOp.getInQubits(), qubitUnitaryOp.getAdjointAttr(), qubitUnitaryOp.getInCtrlQubits(), qubitUnitaryOp.getInCtrlValues()); - bufferization::replaceOpWithBufferizedValues(rewriter, op, - newQubitUnitaryOp.getOutQubits()); - return success(); } }; From 8a1c49ae25ebd1a42da17f59a7859c685789334f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 19 Sep 2024 10:29:48 -0400 Subject: [PATCH 103/183] Use old dealloaction steps --- frontend/catalyst/compiler.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 2041560145..f51f9241b6 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -212,16 +212,13 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", "canonicalize", "gradient-postprocess", + "func.func(buffer-hoisting)", + "func.func(buffer-loop-hoisting)", + "func.func(buffer-deallocation)", "convert-arraylist-to-memref", "convert-bufferization-to-memref", "canonicalize", "cp-global-memref", - # "func.func(buffer-hoisting)", - # "func.func(buffer-loop-hoisting)", - # "buffer-results-to-out-params", - # "drop-equivalent-buffer-results", - # "func.func(promote-buffers-to-stack)", - # "buffer-deallocation-pipeline", ], ) From 160a0ed6fa59cbe46d785122bc90ba4bc993ad0b Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 19 Sep 2024 10:50:35 -0400 Subject: [PATCH 104/183] Cleanup --- frontend/catalyst/compiler.py | 7 +++++-- .../Transforms/BufferizableOpInterfaceImpl.cpp | 10 ++++------ .../Transforms/PostprocessingPatterns.cpp | 15 ++++++--------- .../Transforms/BufferizableOpInterfaceImpl.cpp | 4 ++-- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index f51f9241b6..66687e9693 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -210,14 +210,17 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "eliminate-empty-tensors", "convert-elementwise-to-linalg", "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", - "canonicalize", + "canonicalize", # Remove dead memrefToTensorOp's "gradient-postprocess", + # introduced during gradient-bufferize of callbacks "func.func(buffer-hoisting)", "func.func(buffer-loop-hoisting)", "func.func(buffer-deallocation)", "convert-arraylist-to-memref", "convert-bufferization-to-memref", - "canonicalize", + "canonicalize", # Must be after convert-bufferization-to-memref + # otherwise there are issues in lowering of dynamic tensors. + # "cse", "cp-global-memref", ], ) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 4eb9e3bd52..3b620d75a4 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -378,7 +378,7 @@ struct ForwardOpInterface rewriter.replaceOpWithNewOp(returnOp, returnValues, returnOp.getEmpty()); }); - // 4. Rewrite the FuncOp type to buffer form. Also preserve unused return types. + // 4. Rewrite the FuncOp type to buffer form. Also preserve unused return types. SmallVector returnTypes; for (auto retTy : forwardOp.getResultTypes()) { auto tensorType = dyn_cast(retTy); @@ -386,8 +386,7 @@ struct ForwardOpInterface tensorType, *options.defaultMemorySpaceFn(tensorType), forwardOp, options); returnTypes.push_back(resultType); } - forwardOp.setType( - FunctionType::get(op->getContext(), argTypes, returnTypes)); + forwardOp.setType(FunctionType::get(op->getContext(), argTypes, returnTypes)); return success(); } @@ -500,7 +499,7 @@ struct ReverseOpInterface rewriter.replaceOpWithNewOp(returnOp, returnValues, returnOp.getEmpty()); }); - // 4. Rewrite the FuncOp type to buffer form. Also preserve unused return types. + // 4. Rewrite the FuncOp type to buffer form. Also preserve unused return types. SmallVector returnTypes; for (auto retTy : reverseOp.getResultTypes()) { auto tensorType = dyn_cast(retTy); @@ -508,8 +507,7 @@ struct ReverseOpInterface tensorType, *options.defaultMemorySpaceFn(tensorType), reverseOp, options); returnTypes.push_back(resultType); } - reverseOp.setType( - FunctionType::get(op->getContext(), argTypes, returnTypes)); + reverseOp.setType(FunctionType::get(op->getContext(), argTypes, returnTypes)); return success(); } diff --git a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp index 731ad4d5a9..0f1b8332ba 100644 --- a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp @@ -46,11 +46,10 @@ struct PostprocessForwardOp : public OpRewritePattern { auto tape = op.getTape(); // If function signature is modified, this pass cannot be processed. - if (op.getFunctionType().getNumInputs() != argc || - op.getFunctionType().getNumResults() != (resc + tape)) + if (op.getFunctionType().getNumInputs() != argc || + op.getFunctionType().getNumResults() != (resc + tape)) return failure(); - auto argTys = op.getArgumentTypes(); auto retTys = op.getResultTypes(); SmallVector bufferArgs; @@ -132,8 +131,8 @@ struct PostprocessReverseOp : public OpRewritePattern { auto tape = op.getTape(); // If function signature is modified, this pass cannot be processed. - if (op.getFunctionType().getNumInputs() != (forwardResc + tape) || - op.getFunctionType().getNumResults() != forwardArgc) + if (op.getFunctionType().getNumInputs() != (forwardResc + tape) || + op.getFunctionType().getNumResults() != forwardArgc) return failure(); auto argTys = op.getArgumentTypes(); @@ -252,7 +251,7 @@ struct RestoreReverseOp : public OpRewritePattern { auto forwardArgTys = target.getArgumentTypes(); SmallVector noTapeTys; - for (size_t i = 0 ; i < forwardArgTys.size(); ++i) { + for (size_t i = 0; i < forwardArgTys.size(); ++i) { if (i < op.getArgc()) { noTapeTys.push_back(forwardArgTys[i]); } @@ -260,9 +259,7 @@ struct RestoreReverseOp : public OpRewritePattern { auto reverseTy = rewriter.getFunctionType(op.getArgumentTypes(), noTapeTys); - rewriter.modifyOpInPlace(op, [&] { - op.setFunctionType(reverseTy); - }); + rewriter.modifyOpInPlace(op, [&] { op.setFunctionType(reverseTy); }); return failure(); } diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 7f4bb2bfe9..d176098b8f 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -44,8 +44,8 @@ struct QubitUnitaryOpInterface auto toMemrefOp = rewriter.create(loc, memrefType, qubitUnitaryOp.getMatrix()); auto memref = toMemrefOp.getResult(); - bufferization::replaceOpWithNewBufferizedOp(rewriter, op, - qubitUnitaryOp.getOutQubits().getTypes(), + bufferization::replaceOpWithNewBufferizedOp( + rewriter, op, qubitUnitaryOp.getOutQubits().getTypes(), qubitUnitaryOp.getOutCtrlQubits().getTypes(), memref, qubitUnitaryOp.getInQubits(), qubitUnitaryOp.getAdjointAttr(), qubitUnitaryOp.getInCtrlQubits(), qubitUnitaryOp.getInCtrlValues()); From 3f6eaf2d3de662fccf96e98cdc790263f53bca66 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 19 Sep 2024 10:57:28 -0400 Subject: [PATCH 105/183] Fix var names in mlir debug test --- frontend/test/pytest/test_debug.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/test/pytest/test_debug.py b/frontend/test/pytest/test_debug.py index 15685e828d..16c7b9ad8d 100644 --- a/frontend/test/pytest/test_debug.py +++ b/frontend/test/pytest/test_debug.py @@ -466,18 +466,18 @@ def f(x: float): ), ( "MLIRToLLVMDialect", - "%5 = llvm.fmul %4, %4 : f64\n", - "%t = llvm.fmul %4, %4 : f64\n" + " %5 = llvm.fmul %t, %4 : f64\n", + "%6 = llvm.fmul %5, %5 : f64\n", + "%t = llvm.fmul %5, %5 : f64\n" + " %6 = llvm.fmul %t, %5 : f64\n", ), ( "llvm_ir", - "%5 = fmul double %4, %4\n", - "%t = fmul double %4, %4\n" + "%5 = fmul double %t, %4\n", + "%6 = fmul double %5, %5\n", + "%t = fmul double %5, %5\n" + "%6 = fmul double %t, %5\n", ), ( "last", - "%5 = fmul double %4, %4\n", - "%t = fmul double %4, %4\n" + "%5 = fmul double %t, %4\n", + "%6 = fmul double %5, %5\n", + "%t = fmul double %5, %5\n" + "%6 = fmul double %t, %5\n", ), ], ) From f42b8611ea5f9f3e6f0f1b4226428d505b18a742 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 19 Sep 2024 12:20:47 -0400 Subject: [PATCH 106/183] Test removing stride from memreftype in customCall --- .../Transforms/BufferizableOpInterfaceImpl.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 15f287f1d5..33bda43f2d 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -3,6 +3,8 @@ #include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Builders.h" +#include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" #include "Catalyst/IR/CatalystOps.h" @@ -85,7 +87,16 @@ struct CustomCallOpInterface FailureOr opBuffer = getBuffer(rewriter, operand, options); if (failed(opBuffer)) return failure(); - bufferArgs.push_back(*opBuffer); + MemRefType memrefType = dyn_cast(opBuffer->getType()); + if (!memrefType) + return failure(); + if (!memrefType.getLayout().isIdentity()) { + auto nonStrideType = MemRefType::get(memrefType.getShape(), memrefType.getElementType()); + auto newMemRef = rewriter.create(op->getLoc(), nonStrideType, *opBuffer); + bufferArgs.push_back(newMemRef); + } else { + bufferArgs.push_back(*opBuffer); + } } // Add bufferized return values to the arguments From 3c4f0ca8889497f7dcdd9bbbf8bb41b7dece910b Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 19 Sep 2024 20:08:31 -0400 Subject: [PATCH 107/183] Use SubView for allocCopyMemrefDyn if CastOP is not supported --- mlir/lib/Quantum/Transforms/cp_global_buffers.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp index 227d7977b0..b772d378cd 100644 --- a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp +++ b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp @@ -106,6 +106,12 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) Value newMemRef = rewriter.create(loc, memrefType, dynDims); // Cast memrefType back to maintain memory layout. + if (!memref::CastOp::areCastCompatible(memrefType, origMemrefType)) { + auto subview = rewriter.create(loc, origMemrefType, memref); + rewriter.create(loc, memref, subview); + return subview; + } + Value castMemRef = rewriter.create(loc, origMemrefType, newMemRef); rewriter.create(loc, memref, newMemRef); return castMemRef; From 0eb6cbf34d192f4c991a12ea8cbc402b4021c100 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 20 Sep 2024 11:12:17 -0400 Subject: [PATCH 108/183] Use subview to handle memref with strides and offset --- .../Quantum/Transforms/cp_global_buffers.cpp | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp index b772d378cd..4f0d90a8a1 100644 --- a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp +++ b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp @@ -103,12 +103,33 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) ndim++; } } - + Value newMemRef = rewriter.create(loc, memrefType, dynDims); // Cast memrefType back to maintain memory layout. - if (!memref::CastOp::areCastCompatible(memrefType, origMemrefType)) { - auto subview = rewriter.create(loc, origMemrefType, memref); - rewriter.create(loc, memref, subview); + //if (!memref::CastOp::areCastCompatible(memrefType, origMemrefType)) { + if (!origMemrefType.getLayout().isIdentity()) { + SmallVector sizes = + memref::getMixedSizes(rewriter, loc, newMemRef); + + // getlayout + auto layout = origMemrefType.getLayout(); + auto stridedAttr = cast(layout); + auto offset = stridedAttr.getOffset(); + auto strideArr = stridedAttr.getStrides(); + + // Rebuild strides and offsets info + SmallVector strides; + for (auto stride : strideArr) { + strides.push_back(rewriter.getIndexAttr(stride)); + } + SmallVector offsets(origMemrefType.getRank(), + rewriter.getIndexAttr(0)); + offsets[0] = rewriter.getIndexAttr(offset); + auto subview = rewriter.create( + loc, origMemrefType, newMemRef, + offsets, sizes, strides); + subview.print(llvm::outs()); + rewriter.create(loc, memref, newMemRef); return subview; } From d7a0019af47cd0ec3cedb6e294a19530b445aa37 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 20 Sep 2024 13:00:02 -0400 Subject: [PATCH 109/183] Use identity map and cleanup --- frontend/catalyst/compiler.py | 2 +- mlir/lib/Quantum/Transforms/cp_global_buffers.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 66687e9693..32080e23ed 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -209,7 +209,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops}", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops function-boundary-type-conversion=identity-layout-map}", "canonicalize", # Remove dead memrefToTensorOp's "gradient-postprocess", # introduced during gradient-bufferize of callbacks diff --git a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp index 4f0d90a8a1..e622604e07 100644 --- a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp +++ b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp @@ -89,13 +89,13 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) { auto origMemrefType = cast(memref.getType()); // Rebuild MemRefType without memory layout. - auto memrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); + auto newMemrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); llvm::SmallVector dynDims; { llvm::SmallVector dynIndices; int64_t ndim = 0; - for (auto dim : memrefType.getShape()) { + for (auto dim : newMemrefType.getShape()) { if (dim < 0) { Value dynValue = rewriter.create(loc, memref, ndim); dynDims.push_back(dynValue); @@ -104,10 +104,10 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) } } - Value newMemRef = rewriter.create(loc, memrefType, dynDims); + Value newMemRef = rewriter.create(loc, newMemrefType, dynDims); // Cast memrefType back to maintain memory layout. - //if (!memref::CastOp::areCastCompatible(memrefType, origMemrefType)) { - if (!origMemrefType.getLayout().isIdentity()) { + if (!memref::CastOp::areCastCompatible(newMemrefType, origMemrefType)) { + //if (!origMemrefType.getLayout().isIdentity()) { SmallVector sizes = memref::getMixedSizes(rewriter, loc, newMemRef); @@ -128,7 +128,7 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) auto subview = rewriter.create( loc, origMemrefType, newMemRef, offsets, sizes, strides); - subview.print(llvm::outs()); + rewriter.create(loc, memref, newMemRef); return subview; } From 0a915b510709afb2bf84f32deb43074159c250df Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 20 Sep 2024 14:33:27 -0400 Subject: [PATCH 110/183] Add CallOp bufferization patch --- mlir/Makefile | 4 ++ mlir/patches/callOp-bufferization.patch | 71 +++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 mlir/patches/callOp-bufferization.patch diff --git a/mlir/Makefile b/mlir/Makefile index debedd0c3a..d966b0a96f 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -14,6 +14,7 @@ ENABLE_ASAN?=OFF BUILD_TYPE?=Release LLVM_ROOT=$(MK_DIR)/llvm-project LLVM_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch +LLVM_FUNC_CALL_PATCH_FILE=$(MK_DIR)/patches/callOp-bufferization.patch TARGET_FILE=$(MK_DIR)/mlir-hlo/mhlo/transforms/CMakeLists.txt PATCH_FILE=$(MK_DIR)/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -59,6 +60,9 @@ llvm: @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); \ fi + @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then \ + patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); \ + fi cmake -G Ninja -S llvm-project/llvm -B $(LLVM_BUILD_DIR) \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DLLVM_BUILD_EXAMPLES=OFF \ diff --git a/mlir/patches/callOp-bufferization.patch b/mlir/patches/callOp-bufferization.patch new file mode 100644 index 0000000000..2b7180fdd7 --- /dev/null +++ b/mlir/patches/callOp-bufferization.patch @@ -0,0 +1,71 @@ +diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +index 053ea7935260a2..9fbe574ec392dc 100644 +--- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp ++++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +@@ -258,20 +258,23 @@ struct CallOpInterface + return failure(); + Value buffer = *maybeBuffer; + +- // Caller / callee type mismatch is handled with a CastOp. ++ // Caller / callee type mismatch is handled with castOrReallocMemRefValue. + auto memRefType = funcType.getInput(opOperand.getOperandNumber()); + // Since we don't yet have a clear layout story, to_memref may + // conservatively turn tensors into more dynamic memref than necessary. + // If the memref type of the callee fails, introduce an extra memref.cast + // that will either canonicalize away or fail compilation until we can do +- // something better. ++ // something better. Insert a reallocation + copy if it cannot be ++ // statically guaranteed that a direct cast would be valid. + if (buffer.getType() != memRefType) { +- assert( +- memref::CastOp::areCastCompatible(buffer.getType(), memRefType) && +- "CallOp::bufferize: cast incompatible"); +- Value castBuffer = rewriter.create(callOp.getLoc(), +- memRefType, buffer); +- buffer = castBuffer; ++ auto memrefDstType = dyn_cast(memRefType); ++ assert(memrefDstType && ++ "buffer layout not supported on unranked tensors"); ++ FailureOr replacement = bufferization::castOrReallocMemRefValue( ++ rewriter, buffer, memrefDstType, options); ++ if (failed(replacement)) ++ return failure(); ++ buffer = *replacement; + } + newOperands.push_back(buffer); + } +diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +index 0248afb11f1672..0d5224514e3a02 100644 +--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir ++++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +@@ -71,6 +71,30 @@ func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) + + // ----- + ++// CHECK-NO-LAYOUT-MAP-LABEL: func.func @foo( ++// CHECK-NO-LAYOUT-MAP-SAME: %[[VAL_0:.*]]: memref<3x8xf16>) -> memref<3x8xf16> { ++// CHECK-NO-LAYOUT-MAP: return %[[VAL_0]] : memref<3x8xf16> ++// CHECK-NO-LAYOUT-MAP: } ++func.func @foo(%arg0: tensor<3x8xf16>) -> tensor<3x8xf16> { ++ return %arg0 : tensor<3x8xf16> ++} ++ ++// CHECK-NO-LAYOUT-MAP-LABEL: func.func @call_extract_slice( ++// CHECK-NO-LAYOUT-MAP-SAME: %[[VAL_0:.*]]: memref<4x8xf16>) -> memref<3x8xf16> { ++// CHECK-NO-LAYOUT-MAP: %[[VAL_1:.*]] = memref.subview %[[VAL_0]][1, 0] [3, 8] [1, 1] : memref<4x8xf16> to memref<3x8xf16, strided<[8, 1], offset: 8>> ++// CHECK-NO-LAYOUT-MAP: %[[VAL_2:.*]] = memref.alloc() {alignment = 64 : i64} : memref<3x8xf16> ++// CHECK-NO-LAYOUT-MAP: memref.copy %[[VAL_1]], %[[VAL_2]] : memref<3x8xf16, strided<[8, 1], offset: 8>> to memref<3x8xf16> ++// CHECK-NO-LAYOUT-MAP: %[[VAL_3:.*]] = call @foo(%[[VAL_2]]) : (memref<3x8xf16>) -> memref<3x8xf16> ++// CHECK-NO-LAYOUT-MAP: return %[[VAL_3]] : memref<3x8xf16> ++// CHECK-NO-LAYOUT-MAP: } ++func.func @call_extract_slice(%arg0: tensor<4x8xf16>) -> (tensor<3x8xf16>) { ++ %0 = tensor.extract_slice %arg0[1, 0] [3, 8] [1, 1] : tensor<4x8xf16> to tensor<3x8xf16> ++ %1 = call @foo(%0) : (tensor<3x8xf16>) -> tensor<3x8xf16> ++ return %1 : tensor<3x8xf16> ++} ++ ++// ----- ++ + // CHECK-LABEL: func private @private_func + // CHECK-NO-LAYOUT-MAP-LABEL: func private @private_func(memref) -> f32 + func.func private @private_func(tensor) -> (f32) From 52fd238b65e7bb5f5a365dec45bf2d3c2d87f000 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 20 Sep 2024 14:39:09 -0400 Subject: [PATCH 111/183] Revert changes in frontend/test/pytest/test_debug.py --- frontend/test/pytest/test_debug.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/test/pytest/test_debug.py b/frontend/test/pytest/test_debug.py index 16c7b9ad8d..15685e828d 100644 --- a/frontend/test/pytest/test_debug.py +++ b/frontend/test/pytest/test_debug.py @@ -466,18 +466,18 @@ def f(x: float): ), ( "MLIRToLLVMDialect", - "%6 = llvm.fmul %5, %5 : f64\n", - "%t = llvm.fmul %5, %5 : f64\n" + " %6 = llvm.fmul %t, %5 : f64\n", + "%5 = llvm.fmul %4, %4 : f64\n", + "%t = llvm.fmul %4, %4 : f64\n" + " %5 = llvm.fmul %t, %4 : f64\n", ), ( "llvm_ir", - "%6 = fmul double %5, %5\n", - "%t = fmul double %5, %5\n" + "%6 = fmul double %t, %5\n", + "%5 = fmul double %4, %4\n", + "%t = fmul double %4, %4\n" + "%5 = fmul double %t, %4\n", ), ( "last", - "%6 = fmul double %5, %5\n", - "%t = fmul double %5, %5\n" + "%6 = fmul double %t, %5\n", + "%5 = fmul double %4, %4\n", + "%t = fmul double %4, %4\n" + "%5 = fmul double %t, %4\n", ), ], ) From 18b27628bd468f4e62de72713146c2831c09037c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 20 Sep 2024 15:50:58 -0400 Subject: [PATCH 112/183] Add inline back to bufferization --- frontend/catalyst/compiler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 32080e23ed..b92122f403 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -206,6 +206,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt BUFFERIZATION_PASS = ( "BufferizationPass", [ + "inline", "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", From 266ba2447f4b9998705c2f8dcc3b1204bde480b6 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 10:44:59 -0400 Subject: [PATCH 113/183] Add functionOPInterface patch for mhlo --- mlir/Makefile | 5 +++++ mlir/patches/moduleOp-mhlo.patch | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 mlir/patches/moduleOp-mhlo.patch diff --git a/mlir/Makefile b/mlir/Makefile index d966b0a96f..4d0e7de3d8 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -15,6 +15,8 @@ BUILD_TYPE?=Release LLVM_ROOT=$(MK_DIR)/llvm-project LLVM_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch LLVM_FUNC_CALL_PATCH_FILE=$(MK_DIR)/patches/callOp-bufferization.patch +MHLO_ROOT?=$(MK_DIR)/mlir-hlo +MHLO_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-mhlo.patch TARGET_FILE=$(MK_DIR)/mlir-hlo/mhlo/transforms/CMakeLists.txt PATCH_FILE=$(MK_DIR)/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -94,6 +96,9 @@ mhlo: @if patch --dry-run -p1 -N $(TARGET_FILE) $(PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 $(TARGET_FILE) $(PATCH_FILE); \ fi + @if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then \ + patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); \ + fi cmake -G Ninja -S mlir-hlo -B $(MHLO_BUILD_DIR) \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DLLVM_ENABLE_ASSERTIONS=ON \ diff --git a/mlir/patches/moduleOp-mhlo.patch b/mlir/patches/moduleOp-mhlo.patch new file mode 100644 index 0000000000..74fe5f3dd9 --- /dev/null +++ b/mlir/patches/moduleOp-mhlo.patch @@ -0,0 +1,21 @@ +diff --git a/transforms/bufferize_pass.cc b/transforms/bufferize_pass.cc +index 1e810cff2..c91c49710 100644 +--- a/transforms/bufferize_pass.cc ++++ b/transforms/bufferize_pass.cc +@@ -66,6 +66,7 @@ limitations under the License. + #include "mlir/IR/Operation.h" + #include "mlir/IR/PatternMatch.h" + #include "mlir/IR/Visitors.h" ++#include "mlir/Interfaces/FunctionInterfaces.h" + #include "mlir/Support/LLVM.h" + #include "mlir/Transforms/DialectConversion.h" + #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +@@ -235,7 +236,7 @@ struct OneShotBufferizePass + opts.allowReturnAllocsFromLoops = true; + opts.bufferizeFunctionBoundaries = true; + opts.functionArgTypeConverterFn = +- [=](TensorType tensorType, Attribute memorySpace, func::FuncOp funcOp, ++ [=](TensorType tensorType, Attribute memorySpace, FunctionOpInterface funcOp, + const bufferization::BufferizationOptions& options) { + // Functions created by fusion outlining should have fully dynamic + // layout. All other functions (for now only "main") gets static From 9d5fc4d892322d333f1829e86d7e9fed69966c22 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 20:49:32 -0400 Subject: [PATCH 114/183] Correct bufferizeToMemoryWrite for setState and setBasisState --- mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index d176098b8f..fb3c0c64c7 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -312,7 +312,7 @@ struct SetStateOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return true; + return false; } bufferization::AliasingValueList @@ -353,7 +353,7 @@ struct SetBasisStateOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return true; + return false; } bufferization::AliasingValueList From 0e10abb811147e257e939d40474d038fc0df7711 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 21:38:55 -0400 Subject: [PATCH 115/183] Update linux-x86_64 wheel --- .github/workflows/build-wheel-linux-x86_64.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 109886598f..87bc8bd6d1 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -172,6 +172,11 @@ jobs: if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + export LLVM_ROOT=mlir/llvm-project + export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch + export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ @@ -208,7 +213,10 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch + export MHLO_ROOT?=mlir/mlir-hlo + export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi + if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ From a1dd5d5c57ff4f47130285491cb03c2b4f445e47 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 21:40:50 -0400 Subject: [PATCH 116/183] Update macos-arm64 wheel --- .github/workflows/build-wheel-linux-x86_64.yaml | 2 ++ .github/workflows/build-wheel-macos-arm64.yaml | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 87bc8bd6d1..9854315428 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -172,11 +172,13 @@ jobs: if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + export LLVM_ROOT=mlir/llvm-project export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi + cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index 2fd6220c61..c8b0864fb3 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -137,6 +137,11 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | + export LLVM_ROOT=mlir/llvm-project + export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch + export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ @@ -172,7 +177,10 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch + export MHLO_ROOT?=mlir/mlir-hlo + export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi + if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ From 4718c149a17f7c5a4b14bd1efa8b98cc168c44e2 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 21:42:40 -0400 Subject: [PATCH 117/183] Update macos-x86-64 wheel --- .github/workflows/build-wheel-macos-arm64.yaml | 1 + .github/workflows/build-wheel-macos-x86_64.yaml | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index c8b0864fb3..5808bb58a7 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -142,6 +142,7 @@ jobs: export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi + cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ diff --git a/.github/workflows/build-wheel-macos-x86_64.yaml b/.github/workflows/build-wheel-macos-x86_64.yaml index f03f324617..df18030527 100644 --- a/.github/workflows/build-wheel-macos-x86_64.yaml +++ b/.github/workflows/build-wheel-macos-x86_64.yaml @@ -133,6 +133,12 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | + export LLVM_ROOT=mlir/llvm-project + export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch + export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi + cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ @@ -168,7 +174,10 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch + export MHLO_ROOT?=mlir/mlir-hlo + export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi + if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ From 64aa9ffb63e4dd45dd7ff5db960d9a268d755076 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 21:45:05 -0400 Subject: [PATCH 118/183] Update linux_arm64 scripts --- .github/workflows/scripts/linux_arm64/rh8/build_llvm.sh | 6 ++++++ .github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh | 3 +++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh index 78bb6aadb8..70c3d21a7a 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh @@ -33,6 +33,12 @@ export PATH=/opt/_internal/cpython-${PYTHON_VERSION}.${PYTHON_SUBVERSION}/bin:/o # Install python dependencies /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja +export LLVM_ROOT=mlir/llvm-project +export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch +export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch +if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi +if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi + # Build LLVM cmake -S /catalyst/mlir/llvm-project/llvm -B /catalyst/llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh index 2a5b2e4fa7..b596d1f1c0 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh @@ -38,7 +38,10 @@ sed -i -e 's/LINK_LIBS PUBLIC/LINK_LIBS PUBLIC MLIRDeallocationUtils/g' mlir/mli export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch +export MHLO_ROOT?=mlir/mlir-hlo +export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi +if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi # Build MHLO cmake -S /catalyst/mlir/mlir-hlo -B /catalyst/mhlo-build -G Ninja \ From cf56867fabbe996371567c70fcdcac5befc65013 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 22:05:57 -0400 Subject: [PATCH 119/183] add lcurses flag to compile_executable --- frontend/catalyst/debug/compiler_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/catalyst/debug/compiler_functions.py b/frontend/catalyst/debug/compiler_functions.py index ff3f616f1f..5df5de5c9f 100644 --- a/frontend/catalyst/debug/compiler_functions.py +++ b/frontend/catalyst/debug/compiler_functions.py @@ -335,6 +335,7 @@ def f(x): f"-Wl,-rpath,{python_lib_dir_path}", f"-L{python_lib_dir_path}", "-lpython" + version_str, + "-lcurses" ] # Linker in macOS might use @rpath/Python3.framework/Versions/3.x/Python3. From 6862700cebcb6d1f4c9203ec78ad09f49955aefb Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 23 Sep 2024 23:27:17 -0400 Subject: [PATCH 120/183] Add copy-before-write only for async tests --- frontend/catalyst/compiler.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 8b9202d7d5..babac7a5cc 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -235,6 +235,28 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt ], ) +BUFFERIZATION_ASYNC_PASS = ( + "BufferizationPass", + [ + "inline", + "gradient-preprocess", + "eliminate-empty-tensors", + "convert-elementwise-to-linalg", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops function-boundary-type-conversion=identity-layout-map copy-before-write}", + "canonicalize", # Remove dead memrefToTensorOp's + "gradient-postprocess", + # introduced during gradient-bufferize of callbacks + "func.func(buffer-hoisting)", + "func.func(buffer-loop-hoisting)", + "func.func(buffer-deallocation)", + "convert-arraylist-to-memref", + "convert-bufferization-to-memref", + "canonicalize", # Must be after convert-bufferization-to-memref + # otherwise there are issues in lowering of dynamic tensors. + # "cse", + "cp-global-memref", + ], +) MLIR_TO_LLVM_PASS = ( "MLIRToLLVMDialect", @@ -289,8 +311,6 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt HLO_LOWERING_PASS, QUANTUM_COMPILATION_PASS, BUFFERIZATION_PASS, - # BUFFERIZATION_PASS2, - # BUFFERIZATION_PASS3, MLIR_TO_LLVM_PASS, ] @@ -306,9 +326,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt TAPE_SPLITTING_PASS, HLO_LOWERING_PASS, QUANTUM_COMPILATION_PASS, - BUFFERIZATION_PASS, - # BUFFERIZATION_PASS2, - # BUFFERIZATION_PASS3, + BUFFERIZATION_ASYNC_PASS, MLIR_TO_LLVM_ASYNC_PASS, ] From ee47a0c7caa0472979fcb821744b18d95d262b0a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 10:13:42 -0400 Subject: [PATCH 121/183] Clear subview work around in cg_global_bufferize --- .../Quantum/Transforms/cp_global_buffers.cpp | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp index e622604e07..d1cba0993d 100644 --- a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp +++ b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp @@ -106,33 +106,6 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) Value newMemRef = rewriter.create(loc, newMemrefType, dynDims); // Cast memrefType back to maintain memory layout. - if (!memref::CastOp::areCastCompatible(newMemrefType, origMemrefType)) { - //if (!origMemrefType.getLayout().isIdentity()) { - SmallVector sizes = - memref::getMixedSizes(rewriter, loc, newMemRef); - - // getlayout - auto layout = origMemrefType.getLayout(); - auto stridedAttr = cast(layout); - auto offset = stridedAttr.getOffset(); - auto strideArr = stridedAttr.getStrides(); - - // Rebuild strides and offsets info - SmallVector strides; - for (auto stride : strideArr) { - strides.push_back(rewriter.getIndexAttr(stride)); - } - SmallVector offsets(origMemrefType.getRank(), - rewriter.getIndexAttr(0)); - offsets[0] = rewriter.getIndexAttr(offset); - auto subview = rewriter.create( - loc, origMemrefType, newMemRef, - offsets, sizes, strides); - - rewriter.create(loc, memref, newMemRef); - return subview; - } - Value castMemRef = rewriter.create(loc, origMemrefType, newMemRef); rewriter.create(loc, memref, newMemRef); return castMemRef; From b858cde601795640d2895e46236d5ecee0202b03 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 10:23:50 -0400 Subject: [PATCH 122/183] Reformatting --- frontend/catalyst/compiler.py | 6 ++++-- frontend/catalyst/debug/compiler_functions.py | 2 +- .../Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 9 ++++++--- mlir/lib/Quantum/Transforms/cp_global_buffers.cpp | 5 +++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index b3ba3c3ae4..ba42c376c1 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -221,7 +221,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops function-boundary-type-conversion=identity-layout-map}", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops " + "function-boundary-type-conversion=identity-layout-map}", "canonicalize", # Remove dead memrefToTensorOp's "gradient-postprocess", # introduced during gradient-bufferize of callbacks @@ -244,7 +245,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-preprocess", "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops function-boundary-type-conversion=identity-layout-map copy-before-write}", + "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops " + "function-boundary-type-conversion=identity-layout-map copy-before-write}", "canonicalize", # Remove dead memrefToTensorOp's "gradient-postprocess", # introduced during gradient-bufferize of callbacks diff --git a/frontend/catalyst/debug/compiler_functions.py b/frontend/catalyst/debug/compiler_functions.py index 5df5de5c9f..1f65e90445 100644 --- a/frontend/catalyst/debug/compiler_functions.py +++ b/frontend/catalyst/debug/compiler_functions.py @@ -335,7 +335,7 @@ def f(x): f"-Wl,-rpath,{python_lib_dir_path}", f"-L{python_lib_dir_path}", "-lpython" + version_str, - "-lcurses" + "-lcurses", ] # Linker in macOS might use @rpath/Python3.framework/Versions/3.x/Python3. diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 33bda43f2d..c22ef85d03 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -91,10 +91,13 @@ struct CustomCallOpInterface if (!memrefType) return failure(); if (!memrefType.getLayout().isIdentity()) { - auto nonStrideType = MemRefType::get(memrefType.getShape(), memrefType.getElementType()); - auto newMemRef = rewriter.create(op->getLoc(), nonStrideType, *opBuffer); + auto nonStrideType = + MemRefType::get(memrefType.getShape(), memrefType.getElementType()); + auto newMemRef = + rewriter.create(op->getLoc(), nonStrideType, *opBuffer); bufferArgs.push_back(newMemRef); - } else { + } + else { bufferArgs.push_back(*opBuffer); } } diff --git a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp index d1cba0993d..160adb70d6 100644 --- a/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp +++ b/mlir/lib/Quantum/Transforms/cp_global_buffers.cpp @@ -89,7 +89,8 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) { auto origMemrefType = cast(memref.getType()); // Rebuild MemRefType without memory layout. - auto newMemrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); + auto newMemrefType = + MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); llvm::SmallVector dynDims; { @@ -103,7 +104,7 @@ Value allocCopyMemrefDyn(Location loc, Value memref, PatternRewriter &rewriter) ndim++; } } - + Value newMemRef = rewriter.create(loc, newMemrefType, dynDims); // Cast memrefType back to maintain memory layout. Value castMemRef = rewriter.create(loc, origMemrefType, newMemRef); From e8eb51c2028e6c548c0b30d554b686ef691a4720 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 14:20:32 -0400 Subject: [PATCH 123/183] Try fixing wheel --- .github/workflows/build-wheel-linux-x86_64.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 9854315428..783a01351a 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -172,7 +172,6 @@ jobs: if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH - export LLVM_ROOT=mlir/llvm-project export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch @@ -212,7 +211,6 @@ jobs: # building with LLD is a strong requirement for mhlo run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH - export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch export MHLO_ROOT?=mlir/mlir-hlo From da6888314291aeb9eee467bfde0a70ae117fe46c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 15:11:52 -0400 Subject: [PATCH 124/183] Debug wheel --- .github/workflows/build-wheel-linux-x86_64.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 783a01351a..1e6c231584 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -161,6 +161,11 @@ jobs: - name: Build LLD if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | + export LLVM_ROOT=mlir/llvm-project + export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch + export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_TARGETS_TO_BUILD="host" \ From f0364efffa885e8ee906de73066505b91e9cea5e Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 15:17:27 -0400 Subject: [PATCH 125/183] Try getting rid of exports --- .github/workflows/build-wheel-linux-x86_64.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 1e6c231584..7ceedaff47 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -161,11 +161,8 @@ jobs: - name: Build LLD if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - export LLVM_ROOT=mlir/llvm-project - export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch - export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/llvm-project/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/llvm-project/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/llvm-project/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/llvm-project/patches/callOp-bufferization.patch; fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_TARGETS_TO_BUILD="host" \ From 01555851a1b405d5d9d98194cff2ff10c792092f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 15:22:28 -0400 Subject: [PATCH 126/183] Debug llvm path --- .github/workflows/build-wheel-linux-x86_64.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 7ceedaff47..b625e0391d 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -161,6 +161,7 @@ jobs: - name: Build LLD if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | + pwd if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/llvm-project/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/llvm-project/patches/moduleOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/llvm-project/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/llvm-project/patches/callOp-bufferization.patch; fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ From 08b44939fcd09e37883a6ab4cec872b9151d5f2c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 15:28:50 -0400 Subject: [PATCH 127/183] Try different paths --- .github/workflows/build-wheel-linux-x86_64.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index b625e0391d..3b7657d1b3 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -161,9 +161,9 @@ jobs: - name: Build LLD if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - pwd - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/llvm-project/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/llvm-project/patches/moduleOp-bufferization.patch; fi - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/llvm-project/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/llvm-project/patches/callOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_TARGETS_TO_BUILD="host" \ @@ -175,9 +175,10 @@ jobs: if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + export LLVM_ROOT=mlir/llvm-project - export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch - export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch + export LLVM_MODULE_PATCH_FILE=mlir/patches/moduleOp-bufferization.patch + export LLVM_FUNC_CALL_PATCH_FILE=mlir/patches/callOp-bufferization.patch if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi From a6281e01533d6c4956d4bd9c33a4e7717ad02258 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 24 Sep 2024 15:39:28 -0400 Subject: [PATCH 128/183] Update patch paths in wheel scripts --- .github/workflows/build-wheel-linux-x86_64.yaml | 16 ++++------------ .github/workflows/build-wheel-macos-arm64.yaml | 13 ++++--------- .github/workflows/build-wheel-macos-x86_64.yaml | 11 +++-------- .../scripts/linux_arm64/rh8/build_llvm.sh | 7 ++----- .../scripts/linux_arm64/rh8/build_mhlo.sh | 4 +--- 5 files changed, 14 insertions(+), 37 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 3b7657d1b3..6a42ab3e00 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -161,9 +161,6 @@ jobs: - name: Build LLD if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi - cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_TARGETS_TO_BUILD="host" \ @@ -176,12 +173,9 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH - export LLVM_ROOT=mlir/llvm-project - export LLVM_MODULE_PATCH_FILE=mlir/patches/moduleOp-bufferization.patch - export LLVM_FUNC_CALL_PATCH_FILE=mlir/patches/callOp-bufferization.patch - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi - + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ @@ -217,10 +211,8 @@ jobs: export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch - export MHLO_ROOT?=mlir/mlir-hlo - export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi - if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index 5808bb58a7..d7dfa80c28 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -137,12 +137,9 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - export LLVM_ROOT=mlir/llvm-project - export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch - export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi - + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_EXAMPLES=OFF \ @@ -178,10 +175,8 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch - export MHLO_ROOT?=mlir/mlir-hlo - export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi - if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/.github/workflows/build-wheel-macos-x86_64.yaml b/.github/workflows/build-wheel-macos-x86_64.yaml index df18030527..6dc80356a7 100644 --- a/.github/workflows/build-wheel-macos-x86_64.yaml +++ b/.github/workflows/build-wheel-macos-x86_64.yaml @@ -133,11 +133,8 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - export LLVM_ROOT=mlir/llvm-project - export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch - export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi - if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ @@ -174,10 +171,8 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch - export MHLO_ROOT?=mlir/mlir-hlo - export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi - if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh index 70c3d21a7a..6323558df1 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh @@ -33,11 +33,8 @@ export PATH=/opt/_internal/cpython-${PYTHON_VERSION}.${PYTHON_SUBVERSION}/bin:/o # Install python dependencies /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja -export LLVM_ROOT=mlir/llvm-project -export LLVM_MODULE_PATCH_FILE=mlir/llvm-project/patches/moduleOp-bufferization.patch -export LLVM_FUNC_CALL_PATCH_FILE=mlir/llvm-project/patches/callOp-bufferization.patch -if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); fi -if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); fi +if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/moduleOp-bufferization.patch; fi +if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/callOp-bufferization.patch; fi # Build LLVM cmake -S /catalyst/mlir/llvm-project/llvm -B /catalyst/llvm-build -G Ninja \ diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh index b596d1f1c0..a98ca20787 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh @@ -38,10 +38,8 @@ sed -i -e 's/LINK_LIBS PUBLIC/LINK_LIBS PUBLIC MLIRDeallocationUtils/g' mlir/mli export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch -export MHLO_ROOT?=mlir/mlir-hlo -export MHLO_MODULE_PATCH_FILE=mlir/patches/moduleOp-mhlo.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi -if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); fi +if patch --dry-run -p1 -N --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/moduleOp-mhlo.patch; fi # Build MHLO cmake -S /catalyst/mlir/mlir-hlo -B /catalyst/mhlo-build -G Ninja \ From df1c23fad87ef1b784b36c23effe312b903ddced Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 10:52:55 -0400 Subject: [PATCH 129/183] Apply patch when getting LLVM and MHLO sources --- .github/workflows/build-wheel-linux-x86_64.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 6a42ab3e00..45ffdd1039 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -294,6 +294,9 @@ jobs: key: llvm-${{ needs.constants.outputs.llvm_version }}-container-source enableCrossOsArchive: True fail-on-cache-miss: True + run: | + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi - name: Get Cached LLVM Build id: cache-llvm-build @@ -311,6 +314,11 @@ jobs: key: mhlo-${{ needs.constants.outputs.mhlo_version }}-container-source enableCrossOsArchive: True fail-on-cache-miss: True + run: | + export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt + export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch + if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi - name: Get Cached MHLO Build id: cache-mhlo-build From b5b387678ebc3933a9f22331aad86c2d4c7c0084 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 11:02:51 -0400 Subject: [PATCH 130/183] Revert "Apply patch when getting LLVM and MHLO sources" This reverts commit df1c23fad87ef1b784b36c23effe312b903ddced. --- .github/workflows/build-wheel-linux-x86_64.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 45ffdd1039..6a42ab3e00 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -294,9 +294,6 @@ jobs: key: llvm-${{ needs.constants.outputs.llvm_version }}-container-source enableCrossOsArchive: True fail-on-cache-miss: True - run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi - name: Get Cached LLVM Build id: cache-llvm-build @@ -314,11 +311,6 @@ jobs: key: mhlo-${{ needs.constants.outputs.mhlo_version }}-container-source enableCrossOsArchive: True fail-on-cache-miss: True - run: | - export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt - export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch - if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi - if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi - name: Get Cached MHLO Build id: cache-mhlo-build From 0e0406bfecba1dee6e4df55c03d75596df414467 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 11:06:18 -0400 Subject: [PATCH 131/183] Patch LLVM again before building wheel --- .github/workflows/build-wheel-linux-x86_64.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 6a42ab3e00..ee7a2777ba 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -387,6 +387,9 @@ jobs: - name: Build wheel run: | + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + PYTHON=python${{ matrix.python_version }} \ LLVM_BUILD_DIR=$GITHUB_WORKSPACE/llvm-build \ MHLO_BUILD_DIR=$GITHUB_WORKSPACE/mhlo-build \ From 98fb3db5f281119aacce132a0d37202e5af5f217 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 11:17:32 -0400 Subject: [PATCH 132/183] Apply patches right before building MLIR Dialects --- .github/workflows/build-wheel-linux-x86_64.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index ee7a2777ba..df8b19586b 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -368,6 +368,9 @@ jobs: - name: Build MLIR Dialects run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi cmake -S mlir -B quantum-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_ASSERTIONS=ON \ @@ -387,9 +390,6 @@ jobs: - name: Build wheel run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi - PYTHON=python${{ matrix.python_version }} \ LLVM_BUILD_DIR=$GITHUB_WORKSPACE/llvm-build \ MHLO_BUILD_DIR=$GITHUB_WORKSPACE/mhlo-build \ From 9401b9774c8fab1321668a71d46cc0bec3e15b26 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 11:49:29 -0400 Subject: [PATCH 133/183] Construct BUFFERIZATION_ASYNC_PASS in a concise way --- frontend/catalyst/compiler.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index ba42c376c1..be1d8f8d38 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -241,24 +241,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt BUFFERIZATION_ASYNC_PASS = ( "BufferizationPass", [ - "inline", - "gradient-preprocess", - "eliminate-empty-tensors", - "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops " - "function-boundary-type-conversion=identity-layout-map copy-before-write}", - "canonicalize", # Remove dead memrefToTensorOp's - "gradient-postprocess", - # introduced during gradient-bufferize of callbacks - "func.func(buffer-hoisting)", - "func.func(buffer-loop-hoisting)", - "func.func(buffer-deallocation)", - "convert-arraylist-to-memref", - "convert-bufferization-to-memref", - "canonicalize", # Must be after convert-bufferization-to-memref - # otherwise there are issues in lowering of dynamic tensors. - # "cse", - "cp-global-memref", + s.replace("}", " copy-before-write}") if s.startswith("one-shot-bufferize") else s + for s in BUFFERIZATION_PASS[1] ], ) From c4b7b5e91c076d164ce069e009df6f5fb97bad2f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 11:50:31 -0400 Subject: [PATCH 134/183] Patch LLVM when building dialects in all the other wheel scripts --- .github/workflows/build-wheel-linux-x86_64.yaml | 1 + .github/workflows/build-wheel-macos-arm64.yaml | 4 ++++ .github/workflows/build-wheel-macos-x86_64.yaml | 4 ++++ .github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh | 5 +++++ 4 files changed, 14 insertions(+) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index df8b19586b..63baa19bfd 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -371,6 +371,7 @@ jobs: if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + cmake -S mlir -B quantum-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_ASSERTIONS=ON \ diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index d7dfa80c28..5bccc308b2 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -332,6 +332,10 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + cmake -S mlir -B quantum-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_ASSERTIONS=ON \ diff --git a/.github/workflows/build-wheel-macos-x86_64.yaml b/.github/workflows/build-wheel-macos-x86_64.yaml index 6dc80356a7..17da2a63fc 100644 --- a/.github/workflows/build-wheel-macos-x86_64.yaml +++ b/.github/workflows/build-wheel-macos-x86_64.yaml @@ -323,6 +323,10 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + cmake -S mlir -B quantum-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_ENABLE_ASSERTIONS=ON \ diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh b/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh index aa7692215a..e6165d2f40 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh @@ -37,6 +37,11 @@ export PATH=/catalyst/llvm-build/bin:/opt/_internal/cpython-${PYTHON_VERSION}.${ # Install python dependencies /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja delocate 'amazon-braket-pennylane-plugin>1.27.1' +# Patch LLVM and MHLO +if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi +if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi +if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + # Build Catalyst runtime cmake -S runtime -B runtime-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ From c36e1602f9c31ae347bae39bd73e083bbddf8675 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 13:48:06 -0400 Subject: [PATCH 135/183] Create cache with patches' hash --- .github/workflows/check-catalyst.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/check-catalyst.yaml b/.github/workflows/check-catalyst.yaml index 073824d86d..c782f38029 100644 --- a/.github/workflows/check-catalyst.yaml +++ b/.github/workflows/check-catalyst.yaml @@ -128,7 +128,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} - name: Install Deps if: steps.cache-llvm-build.outputs.cache-hit != 'true' @@ -184,7 +184,7 @@ jobs: uses: actions/cache@v4 with: path: mhlo-build - key: ${{ runner.os }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} - name: Get Cached LLVM Source id: cache-llvm-source @@ -202,7 +202,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Install Deps @@ -272,7 +272,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Install Deps @@ -324,7 +324,7 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Get Cached MHLO Source @@ -341,7 +341,7 @@ jobs: uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ runner.os }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Get Cached Enzyme Source @@ -428,7 +428,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Download Quantum Build Artifact @@ -500,7 +500,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Download Quantum Build Artifact @@ -555,7 +555,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-default-build-${{ matrix.compiler }} + key: ${{ runner.os }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-build-${{ matrix.compiler }} fail-on-cache-miss: true - name: Download Quantum Build Artifact From 2e6324a6b1d9922b0040dbd52f8a4ac51f559296 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 14:44:28 -0400 Subject: [PATCH 136/183] Update patch and its name --- .../workflows/build-wheel-linux-x86_64.yaml | 8 +- .../workflows/build-wheel-macos-arm64.yaml | 4 +- .../workflows/build-wheel-macos-x86_64.yaml | 8 +- .../scripts/linux_arm64/rh8/build_catalyst.sh | 4 +- .../scripts/linux_arm64/rh8/build_llvm.sh | 2 +- .../scripts/linux_arm64/rh8/build_mhlo.sh | 2 +- mlir/Makefile | 8 +- ...> FunctionOpInterface-bufferization.patch} | 103 ++++++++++-------- ...o.patch => FunctionOpInterface-mhlo.patch} | 0 9 files changed, 76 insertions(+), 63 deletions(-) rename mlir/patches/{moduleOp-bufferization.patch => FunctionOpInterface-bufferization.patch} (93%) rename mlir/patches/{moduleOp-mhlo.patch => FunctionOpInterface-mhlo.patch} (100%) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index 63baa19bfd..b6266eedc8 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -173,7 +173,7 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ @@ -212,7 +212,7 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi - if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ @@ -368,9 +368,9 @@ jobs: - name: Build MLIR Dialects run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi - if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi cmake -S mlir -B quantum-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index 5bccc308b2..6382f906d5 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -137,7 +137,7 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ @@ -332,7 +332,7 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi diff --git a/.github/workflows/build-wheel-macos-x86_64.yaml b/.github/workflows/build-wheel-macos-x86_64.yaml index 17da2a63fc..91978928f0 100644 --- a/.github/workflows/build-wheel-macos-x86_64.yaml +++ b/.github/workflows/build-wheel-macos-x86_64.yaml @@ -133,7 +133,7 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi cmake -S mlir/llvm-project/llvm -B llvm-build -G Ninja \ @@ -172,7 +172,7 @@ jobs: export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi - if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi cmake -S mlir/mlir-hlo -B mhlo-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ @@ -323,9 +323,9 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | - if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi - if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi cmake -S mlir -B quantum-build -G Ninja \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh b/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh index e6165d2f40..bc35956fa0 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh @@ -38,9 +38,9 @@ export PATH=/catalyst/llvm-build/bin:/opt/_internal/cpython-${PYTHON_VERSION}.${ /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja delocate 'amazon-braket-pennylane-plugin>1.27.1' # Patch LLVM and MHLO -if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/moduleOp-bufferization.patch; fi +if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi -if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi +if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi # Build Catalyst runtime cmake -S runtime -B runtime-build -G Ninja \ diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh index 6323558df1..a17a928e04 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh @@ -33,7 +33,7 @@ export PATH=/opt/_internal/cpython-${PYTHON_VERSION}.${PYTHON_SUBVERSION}/bin:/o # Install python dependencies /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja -if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/moduleOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/moduleOp-bufferization.patch; fi +if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/callOp-bufferization.patch; fi # Build LLVM diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh index a98ca20787..d4fc7a7f38 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh @@ -39,7 +39,7 @@ sed -i -e 's/LINK_LIBS PUBLIC/LINK_LIBS PUBLIC MLIRDeallocationUtils/g' mlir/mli export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi -if patch --dry-run -p1 -N --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/moduleOp-mhlo.patch; fi +if patch --dry-run -p1 -N --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/FunctionOpInterface-mhlo.patch; fi # Build MHLO cmake -S /catalyst/mlir/mlir-hlo -B /catalyst/mhlo-build -G Ninja \ diff --git a/mlir/Makefile b/mlir/Makefile index 4d0e7de3d8..e3b4820a3c 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -13,10 +13,10 @@ RT_BUILD_DIR?=$(MK_DIR)/../runtime/build ENABLE_ASAN?=OFF BUILD_TYPE?=Release LLVM_ROOT=$(MK_DIR)/llvm-project -LLVM_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-bufferization.patch +LLVM_FUNCOP_PATCH_FILE=$(MK_DIR)/patches/FunctionOpInterface-bufferization.patch LLVM_FUNC_CALL_PATCH_FILE=$(MK_DIR)/patches/callOp-bufferization.patch MHLO_ROOT?=$(MK_DIR)/mlir-hlo -MHLO_MODULE_PATCH_FILE=$(MK_DIR)/patches/moduleOp-mhlo.patch +MHLO_MODULE_PATCH_FILE=$(MK_DIR)/patches/FunctionOpInterface-mhlo.patch TARGET_FILE=$(MK_DIR)/mlir-hlo/mhlo/transforms/CMakeLists.txt PATCH_FILE=$(MK_DIR)/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -59,8 +59,8 @@ all: llvm mhlo enzyme dialects .PHONY: llvm llvm: @echo "build LLVM and MLIR enabling Python bindings" - @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE) > /dev/null 2>&1; then \ - patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_MODULE_PATCH_FILE); \ + @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNCOP_PATCH_FILE) > /dev/null 2>&1; then \ + patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNCOP_PATCH_FILE); \ fi @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNC_CALL_PATCH_FILE); \ diff --git a/mlir/patches/moduleOp-bufferization.patch b/mlir/patches/FunctionOpInterface-bufferization.patch similarity index 93% rename from mlir/patches/moduleOp-bufferization.patch rename to mlir/patches/FunctionOpInterface-bufferization.patch index f66ca1b529..60a2e9b93f 100644 --- a/mlir/patches/moduleOp-bufferization.patch +++ b/mlir/patches/FunctionOpInterface-bufferization.patch @@ -1,21 +1,25 @@ diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h -index 2fda091e412a..ba28596d1f97 100644 +index 2fda091e412a..eb0df1d92d6a 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h -@@ -9,6 +9,7 @@ - #ifndef MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZABLEOPINTERFACE_H_ - #define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZABLEOPINTERFACE_H_ +@@ -11,6 +11,7 @@ -+#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" ++#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LLVM.h" -@@ -262,7 +263,7 @@ struct BufferizationOptions { + #include "llvm/ADT/DenseMapInfoVariant.h" + #include "llvm/ADT/SetVector.h" +@@ -260,9 +261,9 @@ struct BufferizationOptions { + using AnalysisStateInitFn = std::function; + /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, func op, bufferization options - using FunctionArgTypeConverterFn = - std::function; -+ FunctionOpInterface, const BufferizationOptions &)>; ++ using FunctionArgTypeConverterFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, bufferization options using UnknownTypeConverterFn = std::function(b.getTerminator())) { -+static Operation* getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { ++static Operation *getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { + Operation *returnOp = nullptr; + for (Block &b : funcOp.getFunctionBody()) { + auto candidateOp = b.getTerminator(); @@ -126,12 +130,13 @@ index 0a4072605c26..ce90d907b4ca 100644 if (returnOp) return nullptr; returnOp = candidateOp; -@@ -126,16 +127,15 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, +@@ -126,16 +127,16 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. static LogicalResult -aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, -+aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, OneShotAnalysisState &state, ++aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, ++ OneShotAnalysisState &state, FuncAnalysisState &funcState) { - if (funcOp.getBody().empty()) { + if (funcOp.getFunctionBody().empty()) { @@ -147,7 +152,7 @@ index 0a4072605c26..ce90d907b4ca 100644 if (!isa(resultIt.value())) continue; int64_t returnIdx = resultIt.index(); -@@ -147,7 +147,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -147,7 +148,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, } // Support only single return-terminated block in the function. @@ -156,32 +161,35 @@ index 0a4072605c26..ce90d907b4ca 100644 assert(returnOp && "expected func with single return op"); for (OpOperand &returnVal : returnOp->getOpOperands()) -@@ -168,7 +168,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -168,8 +169,8 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, return success(); } -static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, -+static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, bool isRead, - bool isWritten) { +- bool isWritten) { ++static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, ++ bool isRead, bool isWritten) { OpBuilder b(funcOp.getContext()); Attribute accessType; -@@ -189,12 +189,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, + if (isRead && isWritten) { +@@ -189,12 +190,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, /// function with unknown ops, we conservatively assume that such ops bufferize /// to a read + write. static LogicalResult -funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, -+funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, OneShotAnalysisState &state, ++funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, ++ OneShotAnalysisState &state, FuncAnalysisState &funcState) { - for (int64_t idx = 0, e = funcOp.getFunctionType().getNumInputs(); idx < e; -+ for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; - ++idx) { +- ++idx) { ++ for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; ++idx) { // Skip non-tensor arguments. - if (!isa(funcOp.getFunctionType().getInput(idx))) + if (!isa(funcOp.getArgumentTypes()[idx])) continue; bool isRead; bool isWritten; -@@ -204,7 +204,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -204,7 +205,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, StringRef str = accessAttr.getValue(); isRead = str == "read" || str == "read-write"; isWritten = str == "write" || str == "read-write"; @@ -190,7 +198,7 @@ index 0a4072605c26..ce90d907b4ca 100644 // If the function has no body, conservatively assume that all args are // read + written. isRead = true; -@@ -230,20 +230,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +@@ -230,20 +231,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, /// Remove bufferization attributes on FuncOp arguments. static void removeBufferizationAttributes(BlockArgument bbArg) { @@ -214,7 +222,7 @@ index 0a4072605c26..ce90d907b4ca 100644 SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } -@@ -251,12 +250,12 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { +@@ -251,12 +251,13 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. // TODO: This does not handle cyclic function call graphs etc. @@ -227,11 +235,12 @@ index 0a4072605c26..ce90d907b4ca 100644 - assert(calledFunction && "could not retrieved called func::FuncOp"); + funcOp->walk([&](CallOpInterface callOp) { + FunctionOpInterface calledFunction = getCalledFunction(callOp); -+ assert(calledFunction && "could not retrieved called FunctionOpInterface"); ++ if (!calledFunction) ++ return WalkResult::skip(); // No equivalence info available for the called function. if (!funcState.equivalentFuncArgs.count(calledFunction)) -@@ -267,7 +266,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, +@@ -267,7 +268,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, int64_t bbargIdx = it.second; if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) continue; @@ -240,20 +249,21 @@ index 0a4072605c26..ce90d907b4ca 100644 Value argVal = callOp->getOperand(bbargIdx); state.unionEquivalenceClasses(returnVal, argVal); } -@@ -277,10 +276,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, +@@ -277,11 +278,9 @@ static void equivalenceAnalysis(func::FuncOp funcOp, } /// Return "true" if the given function signature has tensor semantics. -static bool hasTensorSignature(func::FuncOp funcOp) { - return llvm::any_of(funcOp.getFunctionType().getInputs(), -+static bool hasTensorSignature(FunctionOpInterface funcOp) { -+ return llvm::any_of(funcOp.getArgumentTypes(), - llvm::IsaPred) || +- llvm::IsaPred) || - llvm::any_of(funcOp.getFunctionType().getResults(), -+ llvm::any_of(funcOp.getResultTypes(), - llvm::IsaPred); +- llvm::IsaPred); ++static bool hasTensorSignature(FunctionOpInterface funcOp) { ++ return llvm::any_of(funcOp.getArgumentTypes(), llvm::IsaPred) || ++ llvm::any_of(funcOp.getResultTypes(), llvm::IsaPred); } + /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by @@ -291,16 +290,16 @@ static bool hasTensorSignature(func::FuncOp funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult @@ -277,7 +287,7 @@ index 0a4072605c26..ce90d907b4ca 100644 if (!returnOp) return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " -@@ -309,9 +308,9 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, +@@ -309,9 +308,10 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; @@ -286,11 +296,12 @@ index 0a4072605c26..ce90d907b4ca 100644 - assert(calledFunction && "could not retrieved called func::FuncOp"); + return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { + FunctionOpInterface calledFunction = getCalledFunction(callOp); -+ assert(calledFunction && "could not retrieved called FunctionOpInterface"); ++ if (!calledFunction) ++ return WalkResult::skip(); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. if (!hasTensorSignature(calledFunction)) -@@ -349,11 +348,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, +@@ -349,11 +349,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, /// most generic layout map as function return types. After bufferizing the /// entire function body, a more concise memref type can potentially be used for /// the return type of the function. @@ -305,16 +316,18 @@ index 0a4072605c26..ce90d907b4ca 100644 SmallVector resultTypes; for (OpOperand &operand : returnOp->getOpOperands()) { -@@ -366,7 +365,7 @@ static void foldMemRefCasts(func::FuncOp funcOp) { +@@ -365,8 +365,8 @@ static void foldMemRefCasts(func::FuncOp funcOp) { + } } - auto newFuncType = FunctionType::get( +- auto newFuncType = FunctionType::get( - funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes); -+ funcOp.getContext(), funcOp.getArgumentTypes(), resultTypes); ++ auto newFuncType = FunctionType::get(funcOp.getContext(), ++ funcOp.getArgumentTypes(), resultTypes); funcOp.setType(newFuncType); } -@@ -379,7 +378,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -379,7 +379,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. @@ -323,7 +336,7 @@ index 0a4072605c26..ce90d907b4ca 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -388,7 +387,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -388,7 +388,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. @@ -332,7 +345,7 @@ index 0a4072605c26..ce90d907b4ca 100644 if (!state.getOptions().isOpAllowed(funcOp)) continue; -@@ -416,7 +415,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, +@@ -416,7 +416,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, void mlir::bufferization::removeBufferizationAttributesInModule( ModuleOp moduleOp) { @@ -341,7 +354,7 @@ index 0a4072605c26..ce90d907b4ca 100644 for (BlockArgument bbArg : op.getArguments()) removeBufferizationAttributes(bbArg); }); -@@ -430,7 +429,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -430,7 +430,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. @@ -350,7 +363,7 @@ index 0a4072605c26..ce90d907b4ca 100644 // A mapping of FuncOps to their callers. FuncCallerMap callerMap; -@@ -439,11 +438,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -439,11 +439,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. @@ -364,7 +377,7 @@ index 0a4072605c26..ce90d907b4ca 100644 // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; -@@ -463,7 +462,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( +@@ -463,7 +463,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { // Functions were already bufferized. @@ -373,7 +386,7 @@ index 0a4072605c26..ce90d907b4ca 100644 continue; if (failed(bufferizeOp(&op, options, statistics))) return failure(); -@@ -490,12 +489,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( +@@ -490,12 +490,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // FuncOps whose names are specified in options.noAnalysisFuncFilter will // not be analyzed. Ops in these FuncOps will not be analyzed as well. OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { diff --git a/mlir/patches/moduleOp-mhlo.patch b/mlir/patches/FunctionOpInterface-mhlo.patch similarity index 100% rename from mlir/patches/moduleOp-mhlo.patch rename to mlir/patches/FunctionOpInterface-mhlo.patch From 975b28053aedec504027b942eef18a5907ea8e87 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 16:37:40 -0400 Subject: [PATCH 137/183] Reapply patch when building MLIR dialects --- .github/workflows/check-catalyst.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/check-catalyst.yaml b/.github/workflows/check-catalyst.yaml index c782f38029..65d033d342 100644 --- a/.github/workflows/check-catalyst.yaml +++ b/.github/workflows/check-catalyst.yaml @@ -372,6 +372,10 @@ jobs: - name: Build MLIR Dialects run: | + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi + if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi + CCACHE_DIR="$(pwd)/.ccache" \ C_COMPILER=$(which ${{ needs.constants.outputs[format('c_compiler.{0}', matrix.compiler)] }}) \ CXX_COMPILER=$(which ${{ needs.constants.outputs[format('cxx_compiler.{0}', matrix.compiler)] }}) \ From 3e68cd8bb27bbc786b67322b7b4ccc61836a612a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 17:50:57 -0400 Subject: [PATCH 138/183] Update changelog --- doc/releases/changelog-dev.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md index 61235d045a..4c624d2e80 100644 --- a/doc/releases/changelog-dev.md +++ b/doc/releases/changelog-dev.md @@ -202,6 +202,8 @@ * Cached primitive lowerings is used instead of a custom cache structure. [(#1159)](https://github.com/PennyLaneAI/catalyst/pull/1159) +* Catalyst now supports `one-shot bufferize` from MLIR, which is required for JAX v0.4.29 or higher. [(#1027)](https://github.com/PennyLaneAI/catalyst/pull/1027) +

Breaking changes

* Remove `static_size` field from `AbstractQreg` class. @@ -262,6 +264,7 @@ Spencer Comin, Lillian M.A. Frederiksen, Sengthai Heng, David Ittah, +Tzung-Han Juang, Mehrdad Malekmohammadi, Vincent Michaud-Rioux, Romain Moyard, From 661a1d536721851499da689db824825e751d6d1f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 30 Sep 2024 17:54:47 -0400 Subject: [PATCH 139/183] Add patch hash to macos-arm64 --- .github/workflows/build-wheel-macos-arm64.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index 6382f906d5..85fcaf7d2d 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -105,14 +105,14 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-${{matrix.python_version}}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-${{matrix.python_version}}-wheel-build - name: Restore MHLO Build id: cache-mhlo-build uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build lookup-only: True - name: Restore Enzyme Build @@ -166,7 +166,7 @@ jobs: uses: actions/cache/save@v4 with: path: llvm-build - key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-${{matrix.python_version}}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-${{matrix.python_version}}-wheel-build - name: Build MHLO Dialect if: steps.cache-mhlo-build.outputs.cache-hit != 'true' @@ -196,7 +196,7 @@ jobs: uses: actions/cache/save@v4 with: path: mhlo-build - key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build - name: Build Enzyme if: steps.cache-enzyme-build.outputs.cache-hit != 'true' @@ -260,7 +260,7 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-3.10-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-3.10-wheel-build fail-on-cache-miss: True - name: Get Cached MHLO Source @@ -277,7 +277,7 @@ jobs: uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build fail-on-cache-miss: True - name: Get Cached Enzyme Source From 9d5d1179cfd1b55c4179b2a3ca4dab2b2bc4dbc6 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 1 Oct 2024 09:53:47 -0400 Subject: [PATCH 140/183] Add patch hash to linux-arm64 wheel --- .github/workflows/build-wheel-linux-arm64.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-wheel-linux-arm64.yaml b/.github/workflows/build-wheel-linux-arm64.yaml index 304b23ee5d..9e6d70465a 100644 --- a/.github/workflows/build-wheel-linux-arm64.yaml +++ b/.github/workflows/build-wheel-linux-arm64.yaml @@ -63,7 +63,7 @@ jobs: uses: actions/cache@v4 with: path: mlir/llvm-project - key: llvm-${{ needs.constants.outputs.llvm_version }}-default-source + key: llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-source enableCrossOsArchive: True - name: Cache MHLO Source @@ -71,7 +71,7 @@ jobs: uses: actions/cache@v4 with: path: mlir/mlir-hlo - key: mhlo-${{ needs.constants.outputs.mhlo_version }}-default-source + key: mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-default-source enableCrossOsArchive: True - name: Cache Enzyme Source @@ -112,14 +112,14 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-wheel-build + key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build - name: Restore MHLO Build id: cache-mhlo-build uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ matrix.container_name }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ matrix.container_name }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build lookup-only: True - name: Restore Enzyme Build @@ -160,7 +160,7 @@ jobs: uses: actions/cache/save@v4 with: path: llvm-build - key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-wheel-build + key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build - name: Build MHLO Dialect if: steps.cache-mhlo-build.outputs.cache-hit != 'true' @@ -179,7 +179,7 @@ jobs: uses: actions/cache/save@v4 with: path: mhlo-build - key: ${{ matrix.container_name }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ matrix.container_name }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build - name: Build Enzyme if: steps.cache-enzyme-build.outputs.cache-hit != 'true' @@ -240,7 +240,7 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-wheel-build + key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build fail-on-cache-miss: True - name: Get Cached MHLO Source @@ -257,7 +257,7 @@ jobs: uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ matrix.container_name }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ matrix.container_name }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build fail-on-cache-miss: True - name: Get Cached Enzyme Source @@ -334,7 +334,7 @@ jobs: uses: actions/cache@v4 with: path: llvm-build - key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-wheel-build + key: ${{ matrix.container_name }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build fail-on-cache-miss: True - name: Run Python Pytest Tests From 170d2117b341225b4eb271f8ec5b8e6a3cc3a2f2 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 1 Oct 2024 12:06:33 -0400 Subject: [PATCH 141/183] Make x86_64 use patch hash --- .github/workflows/build-wheel-linux-x86_64.yaml | 12 ++++++------ .github/workflows/build-wheel-macos-x86_64.yaml | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index b6266eedc8..b9eb13da2c 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -118,14 +118,14 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ matrix.container_img }}-llvm-${{ needs.constants.outputs.llvm_version }}-${{matrix.python_version}}-wheel-build + key: ${{ matrix.container_img }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-${{matrix.python_version}}-wheel-build - name: Restore MHLO Build id: cache-mhlo-build uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ matrix.container_img }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ matrix.container_img }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build lookup-only: True - name: Restore Enzyme Build @@ -202,7 +202,7 @@ jobs: uses: actions/cache/save@v4 with: path: llvm-build - key: ${{ matrix.container_img }}-llvm-${{ needs.constants.outputs.llvm_version }}-${{matrix.python_version}}-wheel-build + key: ${{ matrix.container_img }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-${{matrix.python_version}}-wheel-build - name: Build MHLO Dialect if: steps.cache-mhlo-build.outputs.cache-hit != 'true' @@ -232,7 +232,7 @@ jobs: uses: actions/cache/save@v4 with: path: mhlo-build - key: ${{ matrix.container_img }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ matrix.container_img }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build - name: Build Enzyme if: steps.cache-enzyme-build.outputs.cache-hit != 'true' @@ -300,7 +300,7 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ matrix.container_img }}-llvm-${{ needs.constants.outputs.llvm_version }}-3.10-wheel-build + key: ${{ matrix.container_img }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-3.10-wheel-build fail-on-cache-miss: True - name: Get Cached MHLO Source @@ -317,7 +317,7 @@ jobs: uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ matrix.container_img }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ matrix.container_img }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build fail-on-cache-miss: True - name: Get Cached Enzyme Source diff --git a/.github/workflows/build-wheel-macos-x86_64.yaml b/.github/workflows/build-wheel-macos-x86_64.yaml index 91978928f0..57df711f01 100644 --- a/.github/workflows/build-wheel-macos-x86_64.yaml +++ b/.github/workflows/build-wheel-macos-x86_64.yaml @@ -103,14 +103,14 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-${{matrix.python_version}}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-${{matrix.python_version}}-wheel-build - name: Restore MHLO Build id: cache-mhlo-build uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build lookup-only: True - name: Restore Enzyme Build @@ -162,7 +162,7 @@ jobs: uses: actions/cache/save@v4 with: path: llvm-build - key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-${{matrix.python_version}}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-${{matrix.python_version}}-wheel-build - name: Build MHLO Dialect if: steps.cache-mhlo-build.outputs.cache-hit != 'true' @@ -192,7 +192,7 @@ jobs: uses: actions/cache/save@v4 with: path: mhlo-build - key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build - name: Build Enzyme if: steps.cache-enzyme-build.outputs.cache-hit != 'true' @@ -250,7 +250,7 @@ jobs: uses: actions/cache/restore@v4 with: path: llvm-build - key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-3.10-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ needs.constants.outputs.llvm_version }}-patch-${{ hashFiles('mlir/patches/**') }}-3.10-wheel-build fail-on-cache-miss: True - name: Get Cached MHLO Source @@ -267,7 +267,7 @@ jobs: uses: actions/cache/restore@v4 with: path: mhlo-build - key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-wheel-build + key: ${{ runner.os }}-${{ runner.arch }}-mhlo-${{ needs.constants.outputs.mhlo_version }}-patch-${{ hashFiles('mlir/patches/**') }}-wheel-build fail-on-cache-miss: True - name: Get Cached Enzyme Source From 386f27472a3a9dffdbb084796faed36d7db07186 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 3 Oct 2024 10:36:54 -0400 Subject: [PATCH 142/183] Install ncurses for linux-arm64 wheels --- .github/workflows/scripts/linux_arm64/rh8/test_wheels.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh b/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh index 626d3d93ce..817ea3a8a1 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh @@ -11,7 +11,7 @@ export PYTHON_PACKAGE=$4 # Install system dependencies (gcc gives access to c99, which is needed by some tests) dnf update -y -dnf install -y libzstd-devel gcc-toolset-${GCC_VERSION} gcc +dnf install -y libzstd-devel gcc-toolset-${GCC_VERSION} gcc ncurses-devel if [ "$PYTHON_MAJOR_MINOR" != "3.10" ]; then dnf install -y ${PYTHON_PACKAGE} ${PYTHON_PACKAGE}-devel else From 17c94d60eeb7c20d4afdea7fb540a9fbe787dc7a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 4 Oct 2024 15:59:04 -0400 Subject: [PATCH 143/183] Add top-level comments for bufferization --- .../Transforms/BufferizableOpInterfaceImpl.cpp | 18 ++++++++++++++++++ .../Transforms/BufferizableOpInterfaceImpl.cpp | 18 ++++++++++++++++++ .../Transforms/BufferizableOpInterfaceImpl.cpp | 14 ++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index c22ef85d03..7eac6bbaa6 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -14,6 +14,24 @@ using namespace mlir; using namespace catalyst; namespace { +/** + * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, + * and `getAliasingValues`. + * + * `bufferizesToMemoryRead`: Return `true` if the buffer of the given tensor OpOperand is read. + * + * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written + * (if bufferizing in-place). + * + * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given OpOperand. + * Note that MLIR documentation does not mention `getAliasingValues` but it seems to serve the same purpose. + * + * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires + * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the implementation of + * `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and `getAliasingOpOperands`. + * + * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize + */ /// Bufferization of catalyst.print. Get memref of printOp.val. struct PrintOpInterface diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 3b620d75a4..242f61043b 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -24,6 +24,24 @@ using namespace mlir; using namespace catalyst::gradient; namespace { +/** + * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, + * and `getAliasingValues`. + * + * `bufferizesToMemoryRead`: Return `true` if the buffer of the given tensor OpOperand is read. + * + * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written + * (if bufferizing in-place). + * + * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given OpOperand. + * Note that MLIR documentation does not mention `getAliasingValues` but it seems to serve the same purpose. + * + * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires + * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the implementation of + * `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and `getAliasingOpOperands`. + * + * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize + */ static BaseMemRefType getBufferizedFunctionArgType(FunctionOpInterface funcOp, int64_t index, diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index fb3c0c64c7..d58dffe389 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -10,6 +10,20 @@ using namespace mlir; using namespace catalyst::quantum; namespace { +/** + * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, + * and `getAliasingValues`. + * + * `bufferizesToMemoryRead`: Return `true` if the buffer of the given tensor OpOperand is read. + * + * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written + * (if bufferizing in-place). + * + * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given OpOperand. + * Note that MLIR documentation does not mention `getAliasingValues` but it seems to serve the same purpose. + * + * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize + */ /// Bufferization of catalyst.quantum.unitary. Convert Matrix into memref. struct QubitUnitaryOpInterface From d58a215fb947a6b67e1e9eef0d7ee7871fb5ad4f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 4 Oct 2024 16:19:27 -0400 Subject: [PATCH 144/183] Add comment that explains RestoreReverseOp --- .../Transforms/PostprocessingPatterns.cpp | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp index 0f1b8332ba..d8303e77cf 100644 --- a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp @@ -206,12 +206,35 @@ struct PostprocessReverseOp : public OpRewritePattern { }; struct RestoreReverseOp : public OpRewritePattern { + /* One-shot bufferize optimizes away the return values that are not used. + * This pass aims to revert the changed made by One-shot bufferize. + * + * For example, + * ``` + * gradient.reverse @bwd.rev(%arg0: memref) -> memref, memref attributes {argc = 2 : i64, implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape = 0 : i64} { + * %0 = func.call @bwd(%arg0) : (memref) -> memref + * %alloc = memref.alloc() {alignment = 64 : i64} : memref + * memref.copy %0, %alloc : memref to memref + * gradient.return {empty = true} %alloc : memref + * } + * ``` + * will be turned into + * * ``` + * gradient.reverse @bwd.rev(%arg0: memref) -> memref attributes {argc = 2 : i64, implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape = 0 : i64} { + * %0 = func.call @bwd(%arg0) : (memref) -> memref + * %alloc = memref.alloc() {alignment = 64 : i64} : memref + * memref.copy %0, %alloc : memref to memref + * gradient.return {empty = true} %alloc : memref + * } + * ``` + * + * However, Enzyme expects to see the removed return. We have to add it back. + */ using OpRewritePattern::OpRewritePattern; mlir::LogicalResult matchAndRewrite(ReverseOp op, mlir::PatternRewriter &rewriter) const override { - // ReverseOp's output is optimized away by one-shot bufferize. auto forwardArgc = op.getArgc(); auto forwardResc = op.getResc(); auto tape = op.getTape(); From b438e4d9c014e78b09384ceb3863675eafa76368 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 09:57:35 -0400 Subject: [PATCH 145/183] Update mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp Co-authored-by: David Ittah --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 7eac6bbaa6..432b135282 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -299,4 +299,4 @@ void catalyst::registerBufferizableOpInterfaceExternalModels(DialectRegistry &re CallbackOp::attachInterface(*ctx); CallbackCallOp::attachInterface(*ctx); }); -} \ No newline at end of file +} From cf4018c1bbdcb40bb21d8234c611a63f6951914a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 10:09:52 -0400 Subject: [PATCH 146/183] Update mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h Co-authored-by: David Ittah --- mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h index c8426be8b3..ae5096eb39 100644 --- a/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/Gradient/Transforms/BufferizableOpInterfaceImpl.h @@ -10,4 +10,4 @@ void registerBufferizableOpInterfaceExternalModels(mlir::DialectRegistry ®ist } -} // namespace catalyst \ No newline at end of file +} // namespace catalyst From 71abe78fe2e5a73186aa51a19118af144eeee6ee Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 10:10:28 -0400 Subject: [PATCH 147/183] Update mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h Co-authored-by: David Ittah --- mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h index e56c914ba4..ec20d6f6c9 100644 --- a/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/Catalyst/Transforms/BufferizableOpInterfaceImpl.h @@ -6,4 +6,4 @@ namespace catalyst { void registerBufferizableOpInterfaceExternalModels(mlir::DialectRegistry ®istry); -} // namespace catalyst \ No newline at end of file +} // namespace catalyst From c1384522567c7c59758d595e637cda78c416b2d3 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 10:33:29 -0400 Subject: [PATCH 148/183] Use compare instead of == --- mlir/lib/Catalyst/Transforms/AsyncUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp b/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp index a295156330..e811668e89 100644 --- a/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp +++ b/mlir/lib/Catalyst/Transforms/AsyncUtils.cpp @@ -215,7 +215,7 @@ std::optional AsyncUtils::getCalleeSafe(LLVM::CallOp callOp) bool AsyncUtils::isFunctionNamed(LLVM::LLVMFuncOp funcOp, llvm::StringRef expectedName) { llvm::StringRef observedName = funcOp.getSymName(); - return observedName == expectedName; + return observedName.compare(expectedName) == 0; } bool AsyncUtils::isMlirAsyncRuntimeCreateValue(LLVM::LLVMFuncOp funcOp) From e8322ac387512e2fa5c12d268f68d5dbdec89424 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 10:52:01 -0400 Subject: [PATCH 149/183] Add TODO reminder for removing patches after updating jax --- .github/workflows/build-wheel-linux-x86_64.yaml | 3 +++ .github/workflows/build-wheel-macos-arm64.yaml | 3 +++ .github/workflows/build-wheel-macos-x86_64.yaml | 3 +++ .github/workflows/check-catalyst.yaml | 1 + .github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh | 2 +- .github/workflows/scripts/linux_arm64/rh8/build_llvm.sh | 1 + .github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh | 1 + mlir/Makefile | 2 ++ 8 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-wheel-linux-x86_64.yaml b/.github/workflows/build-wheel-linux-x86_64.yaml index b9eb13da2c..b5685d6c1d 100644 --- a/.github/workflows/build-wheel-linux-x86_64.yaml +++ b/.github/workflows/build-wheel-linux-x86_64.yaml @@ -173,6 +173,7 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi @@ -208,6 +209,7 @@ jobs: if: steps.cache-mhlo-build.outputs.cache-hit != 'true' # building with LLD is a strong requirement for mhlo run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch @@ -367,6 +369,7 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi diff --git a/.github/workflows/build-wheel-macos-arm64.yaml b/.github/workflows/build-wheel-macos-arm64.yaml index 85fcaf7d2d..0e7b967da9 100644 --- a/.github/workflows/build-wheel-macos-arm64.yaml +++ b/.github/workflows/build-wheel-macos-arm64.yaml @@ -137,6 +137,7 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi @@ -173,6 +174,7 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi @@ -332,6 +334,7 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/moduleOp-mhlo.patch; fi diff --git a/.github/workflows/build-wheel-macos-x86_64.yaml b/.github/workflows/build-wheel-macos-x86_64.yaml index 57df711f01..3d87d07b0b 100644 --- a/.github/workflows/build-wheel-macos-x86_64.yaml +++ b/.github/workflows/build-wheel-macos-x86_64.yaml @@ -133,6 +133,7 @@ jobs: - name: Build LLVM / MLIR if: steps.cache-llvm-build.outputs.cache-hit != 'true' run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi @@ -169,6 +170,7 @@ jobs: run: | export PATH=$GITHUB_WORKSPACE/llvm-build/bin:$PATH + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi @@ -323,6 +325,7 @@ jobs: # Build Quantum and Gradient Dialects - name: Build MLIR Dialects run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi diff --git a/.github/workflows/check-catalyst.yaml b/.github/workflows/check-catalyst.yaml index 65d033d342..1b40e723c8 100644 --- a/.github/workflows/check-catalyst.yaml +++ b/.github/workflows/check-catalyst.yaml @@ -372,6 +372,7 @@ jobs: - name: Build MLIR Dialects run: | + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh b/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh index bc35956fa0..74687265f9 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_catalyst.sh @@ -37,7 +37,7 @@ export PATH=/catalyst/llvm-build/bin:/opt/_internal/cpython-${PYTHON_VERSION}.${ # Install python dependencies /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja delocate 'amazon-braket-pennylane-plugin>1.27.1' -# Patch LLVM and MHLO +# Patch LLVM and MHLO. TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/llvm-project < mlir/patches/callOp-bufferization.patch; fi if patch --dry-run -p1 -N --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=mlir/mlir-hlo < mlir/patches/FunctionOpInterface-mhlo.patch; fi diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh index a17a928e04..b4ee206580 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_llvm.sh @@ -33,6 +33,7 @@ export PATH=/opt/_internal/cpython-${PYTHON_VERSION}.${PYTHON_SUBVERSION}/bin:/o # Install python dependencies /usr/bin/python3 -m pip install pennylane pybind11 PyYAML cmake ninja +# TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/FunctionOpInterface-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/FunctionOpInterface-bufferization.patch; fi if patch --dry-run -p1 -N --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/callOp-bufferization.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/llvm-project < /catalyst/mlir/patches/callOp-bufferization.patch; fi diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh index d4fc7a7f38..493af40244 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh @@ -39,6 +39,7 @@ sed -i -e 's/LINK_LIBS PUBLIC/LINK_LIBS PUBLIC MLIRDeallocationUtils/g' mlir/mli export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi +# TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/FunctionOpInterface-mhlo.patch; fi # Build MHLO diff --git a/mlir/Makefile b/mlir/Makefile index e3b4820a3c..14d48a1109 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -58,6 +58,7 @@ all: llvm mhlo enzyme dialects .PHONY: llvm llvm: + # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). @echo "build LLVM and MLIR enabling Python bindings" @if patch --dry-run -p1 -N --directory=$(LLVM_ROOT) < $(LLVM_FUNCOP_PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 --directory=$(LLVM_ROOT) < $(LLVM_FUNCOP_PATCH_FILE); \ @@ -96,6 +97,7 @@ mhlo: @if patch --dry-run -p1 -N $(TARGET_FILE) $(PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 $(TARGET_FILE) $(PATCH_FILE); \ fi + # TODO: Remove this patch after upgrading Jax (potentailly for 0.4.34 or higher). @if patch --dry-run -p1 -N --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE) > /dev/null 2>&1; then \ patch -p1 --directory=$(MHLO_ROOT) < $(MHLO_MODULE_PATCH_FILE); \ fi From 761aaf7281fd5ba3a84cd73845aff2094938658d Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 10:53:17 -0400 Subject: [PATCH 150/183] Update mlir/Makefile Co-authored-by: David Ittah --- mlir/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/Makefile b/mlir/Makefile index 14d48a1109..d713adc969 100644 --- a/mlir/Makefile +++ b/mlir/Makefile @@ -12,6 +12,7 @@ ENZYME_BUILD_DIR?=$(MK_DIR)/Enzyme/build RT_BUILD_DIR?=$(MK_DIR)/../runtime/build ENABLE_ASAN?=OFF BUILD_TYPE?=Release +# TODO: remove after JAX upgrade LLVM_ROOT=$(MK_DIR)/llvm-project LLVM_FUNCOP_PATCH_FILE=$(MK_DIR)/patches/FunctionOpInterface-bufferization.patch LLVM_FUNC_CALL_PATCH_FILE=$(MK_DIR)/patches/callOp-bufferization.patch From 26c56d508fb5f6743f1ee156b43fecc95743a72a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 11:00:18 -0400 Subject: [PATCH 151/183] Add another TODO comment --- .github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh index 493af40244..e452ee22d4 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/build_mhlo.sh @@ -38,6 +38,7 @@ sed -i -e 's/LINK_LIBS PUBLIC/LINK_LIBS PUBLIC MLIRDeallocationUtils/g' mlir/mli export TARGET_FILE=mlir/mlir-hlo/mhlo/transforms/CMakeLists.txt export PATCH_FILE=mlir/patches/mhlo-Add-PassesIncGen-in-transforms-CMakeList.patch +# TODO: Jax has merged this fix. Remove after JAX upgrade. if patch --dry-run -p1 -N $TARGET_FILE $PATCH_FILE > /dev/null 2>&1; then patch -p1 $TARGET_FILE $PATCH_FILE; fi # TODO: Remove these patches after upgrading Jax (potentailly for 0.4.34 or higher). if patch --dry-run -p1 -N --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/FunctionOpInterface-mhlo.patch > /dev/null 2>&1; then patch -p1 --directory=/catalyst/mlir/mlir-hlo < /catalyst/mlir/patches/FunctionOpInterface-mhlo.patch; fi From 0296eb9bc2bbc140f1f09e4cf35dfb41332b1ba2 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 11:19:32 -0400 Subject: [PATCH 152/183] Cleanup --- .../BufferizableOpInterfaceImpl.cpp | 32 +++++++------------ .../BufferizableOpInterfaceImpl.cpp | 18 ++++++----- .../Transforms/PostprocessingPatterns.cpp | 26 +++++++-------- .../BufferizableOpInterfaceImpl.cpp | 11 ++++--- 4 files changed, 40 insertions(+), 47 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 432b135282..9f76b737e9 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -15,20 +15,22 @@ using namespace catalyst; namespace { /** - * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, + * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, * and `getAliasingValues`. - * + * * `bufferizesToMemoryRead`: Return `true` if the buffer of the given tensor OpOperand is read. * - * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written + * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written * (if bufferizing in-place). * - * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given OpOperand. - * Note that MLIR documentation does not mention `getAliasingValues` but it seems to serve the same purpose. + * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given + * OpOperand. Note that MLIR documentation does not mention `getAliasingValues` but it seems to + * serve the same purpose. * - * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires - * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the implementation of - * `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and `getAliasingOpOperands`. + * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires + * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the + * implementation of `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and + * `getAliasingOpOperands`. * * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize */ @@ -105,19 +107,7 @@ struct CustomCallOpInterface FailureOr opBuffer = getBuffer(rewriter, operand, options); if (failed(opBuffer)) return failure(); - MemRefType memrefType = dyn_cast(opBuffer->getType()); - if (!memrefType) - return failure(); - if (!memrefType.getLayout().isIdentity()) { - auto nonStrideType = - MemRefType::get(memrefType.getShape(), memrefType.getElementType()); - auto newMemRef = - rewriter.create(op->getLoc(), nonStrideType, *opBuffer); - bufferArgs.push_back(newMemRef); - } - else { - bufferArgs.push_back(*opBuffer); - } + bufferArgs.push_back(*opBuffer); } // Add bufferized return values to the arguments diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 242f61043b..ce55763343 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -25,20 +25,22 @@ using namespace catalyst::gradient; namespace { /** - * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, + * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, * and `getAliasingValues`. - * + * * `bufferizesToMemoryRead`: Return `true` if the buffer of the given tensor OpOperand is read. * - * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written + * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written * (if bufferizing in-place). * - * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given OpOperand. - * Note that MLIR documentation does not mention `getAliasingValues` but it seems to serve the same purpose. + * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given + * OpOperand. Note that MLIR documentation does not mention `getAliasingValues` but it seems to + * serve the same purpose. * - * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires - * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the implementation of - * `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and `getAliasingOpOperands`. + * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires + * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the + * implementation of `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and + * `getAliasingOpOperands`. * * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize */ diff --git a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp index d8303e77cf..cfef5c9aae 100644 --- a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp @@ -206,28 +206,28 @@ struct PostprocessReverseOp : public OpRewritePattern { }; struct RestoreReverseOp : public OpRewritePattern { - /* One-shot bufferize optimizes away the return values that are not used. + /* One-shot bufferize optimizes away the return values that are not used. * This pass aims to revert the changed made by One-shot bufferize. - * + * * For example, * ``` - * gradient.reverse @bwd.rev(%arg0: memref) -> memref, memref attributes {argc = 2 : i64, implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape = 0 : i64} { - * %0 = func.call @bwd(%arg0) : (memref) -> memref - * %alloc = memref.alloc() {alignment = 64 : i64} : memref - * memref.copy %0, %alloc : memref to memref - * gradient.return {empty = true} %alloc : memref + * gradient.reverse @bwd.rev(%arg0: memref) -> memref, memref attributes {argc = + * 2 : i64, implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape + * = 0 : i64} { %0 = func.call @bwd(%arg0) : (memref) -> memref %alloc = + * memref.alloc() {alignment = 64 : i64} : memref memref.copy %0, %alloc : memref to + * memref gradient.return {empty = true} %alloc : memref * } * ``` - * will be turned into + * will be turned into * * ``` - * gradient.reverse @bwd.rev(%arg0: memref) -> memref attributes {argc = 2 : i64, implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape = 0 : i64} { - * %0 = func.call @bwd(%arg0) : (memref) -> memref - * %alloc = memref.alloc() {alignment = 64 : i64} : memref - * memref.copy %0, %alloc : memref to memref + * gradient.reverse @bwd.rev(%arg0: memref) -> memref attributes {argc = 2 : i64, + * implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape = 0 : + * i64} { %0 = func.call @bwd(%arg0) : (memref) -> memref %alloc = memref.alloc() + * {alignment = 64 : i64} : memref memref.copy %0, %alloc : memref to memref * gradient.return {empty = true} %alloc : memref * } * ``` - * + * * However, Enzyme expects to see the removed return. We have to add it back. */ using OpRewritePattern::OpRewritePattern; diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index d58dffe389..fe11fcc64a 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,16 +11,17 @@ using namespace catalyst::quantum; namespace { /** - * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, + * The new bufferization interface requires `bufferizesToMemoryRead`, `bufferizesToMemoryWrite`, * and `getAliasingValues`. - * + * * `bufferizesToMemoryRead`: Return `true` if the buffer of the given tensor OpOperand is read. * - * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written + * `bufferizesToMemoryWrite`: Return `true` if the buffer of the given tensor OpOperand is written * (if bufferizing in-place). * - * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given OpOperand. - * Note that MLIR documentation does not mention `getAliasingValues` but it seems to serve the same purpose. + * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given + * OpOperand. Note that MLIR documentation does not mention `getAliasingValues` but it seems to + * serve the same purpose. * * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize */ From c27fd57c4a6be36dcaaeed6568759c168d8c9e04 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 11:59:07 -0400 Subject: [PATCH 153/183] Set bufferizesToMemoryWrite for CustomCallOp as true --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 9f76b737e9..5316b0191f 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -72,7 +72,7 @@ struct PrintOpInterface } }; -/// Bufferization of catalyst.print. Mainly get buffers for arguments. +/// Bufferization of catalyst.custom_call. Mainly get buffers for arguments. struct CustomCallOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { @@ -85,7 +85,7 @@ struct CustomCallOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return false; + return true; } bufferization::AliasingValueList From b2879cf1443afe3664762deaa4520f1e83566a22 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 13:39:19 -0400 Subject: [PATCH 154/183] Make CustomCallOp not return failure if its operands are not bufferizble --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 5316b0191f..1e4dc62ef4 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -106,8 +106,9 @@ struct CustomCallOpInterface for (Value operand : operands) { FailureOr opBuffer = getBuffer(rewriter, operand, options); if (failed(opBuffer)) - return failure(); - bufferArgs.push_back(*opBuffer); + bufferArgs.push_back(operand); + else + bufferArgs.push_back(*opBuffer); } // Add bufferized return values to the arguments From b69dd2a3852ca013f8cef7d846266d4475e90198 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 14:25:16 -0400 Subject: [PATCH 155/183] Make CustomCallOp not return failure if its results are not bufferizble --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 1e4dc62ef4..35adc06e94 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -117,7 +117,8 @@ struct CustomCallOpInterface Type resultType = result.getType(); RankedTensorType tensorType = dyn_cast(resultType); if (!tensorType) { - return failure(); + bufferArgs.push_back(result); + continue; } auto options = bufferization::BufferizationOptions(); FailureOr tensorAlloc = bufferization::allocateTensorForShapedValue( From 9069ca8c5610a5fdcf53228edc580b8fd6735093 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 14:36:51 -0400 Subject: [PATCH 156/183] Remove unused CustomCallOp members --- .../BufferizableOpInterfaceImpl.cpp | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 35adc06e94..dc4ed0e5e5 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -164,22 +164,9 @@ struct CallbackOpInterface return false; } - bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return true; - } - - bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { - return false; - } - - bufferization::AliasingValueList - getAliasingValues(Operation *op, OpOperand &opOperand, - const bufferization::AnalysisState &state) const - { + bufferization::AliasingOpOperandList + getAliasingOpOperands(Operation *op, Value value, + const bufferization::AnalysisState &state) const { return {}; } From 9c93c2a6c1d3f1e3dd2cb1cf0437c2bdafa0cf2b Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 15:23:16 -0400 Subject: [PATCH 157/183] Implement bufferizesToAllocation for CallbackCallOp and CustomCallOp --- .../Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index dc4ed0e5e5..4c44c318f8 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -76,6 +76,8 @@ struct PrintOpInterface struct CustomCallOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToAllocation(Operation *op, Value value) const { return true; } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { @@ -166,7 +168,8 @@ struct CallbackOpInterface bufferization::AliasingOpOperandList getAliasingOpOperands(Operation *op, Value value, - const bufferization::AnalysisState &state) const { + const bufferization::AnalysisState &state) const + { return {}; } @@ -199,6 +202,8 @@ struct CallbackOpInterface struct CallbackCallOpInterface : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToAllocation(Operation *op, Value value) const { return true; } + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { From 323b09372db8dd7f4741166335fee975cc18396a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Mon, 7 Oct 2024 17:50:58 -0400 Subject: [PATCH 158/183] Tentatively revert CustomCallOp bufferizesToMemoryWrite to false --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 4c44c318f8..4aadb29bfd 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -87,7 +87,7 @@ struct CustomCallOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return true; + return false; } bufferization::AliasingValueList From 009929bc54df7121acc793ba0e1dd6b9d7f9960c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 8 Oct 2024 12:07:20 -0400 Subject: [PATCH 159/183] Test disable lcurses --- frontend/catalyst/debug/compiler_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/catalyst/debug/compiler_functions.py b/frontend/catalyst/debug/compiler_functions.py index 09175346e8..30e30fae4b 100644 --- a/frontend/catalyst/debug/compiler_functions.py +++ b/frontend/catalyst/debug/compiler_functions.py @@ -271,7 +271,7 @@ def f(x): f"-Wl,-rpath,{python_lib_dir_path}", f"-L{python_lib_dir_path}", "-lpython" + version_str, - "-lcurses", + #"-lcurses", ] # Linker in macOS might use @rpath/Python3.framework/Versions/3.x/Python3. From e3274797d24fcede73c4e49dbf916c338d08fd0f Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 8 Oct 2024 13:22:23 -0400 Subject: [PATCH 160/183] Remove lcurses installation --- .github/workflows/scripts/linux_arm64/rh8/test_wheels.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh b/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh index 817ea3a8a1..626d3d93ce 100644 --- a/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh +++ b/.github/workflows/scripts/linux_arm64/rh8/test_wheels.sh @@ -11,7 +11,7 @@ export PYTHON_PACKAGE=$4 # Install system dependencies (gcc gives access to c99, which is needed by some tests) dnf update -y -dnf install -y libzstd-devel gcc-toolset-${GCC_VERSION} gcc ncurses-devel +dnf install -y libzstd-devel gcc-toolset-${GCC_VERSION} gcc if [ "$PYTHON_MAJOR_MINOR" != "3.10" ]; then dnf install -y ${PYTHON_PACKAGE} ${PYTHON_PACKAGE}-devel else From aa9f69cda8c2a88a70a719473a92afdb2055955c Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 8 Oct 2024 13:27:05 -0400 Subject: [PATCH 161/183] Cleanup --- frontend/catalyst/debug/compiler_functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/frontend/catalyst/debug/compiler_functions.py b/frontend/catalyst/debug/compiler_functions.py index 30e30fae4b..aa8cdb9564 100644 --- a/frontend/catalyst/debug/compiler_functions.py +++ b/frontend/catalyst/debug/compiler_functions.py @@ -271,7 +271,6 @@ def f(x): f"-Wl,-rpath,{python_lib_dir_path}", f"-L{python_lib_dir_path}", "-lpython" + version_str, - #"-lcurses", ] # Linker in macOS might use @rpath/Python3.framework/Versions/3.x/Python3. From 87be1d34cabe415b6cc88e714cc5a383b5d9ba0a Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Tue, 8 Oct 2024 15:16:52 -0400 Subject: [PATCH 162/183] Add comment to CallbackOp's bufferizesToMemoryWrite --- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 4aadb29bfd..7a2e81decc 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -213,6 +213,9 @@ struct CallbackCallOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { + // The method returns `true` if the given OpOperand bufferizes to a memory write. + // Since CallbackCallOp is related to print, which does not write to its operand, + // Maybe we can set this to `false`. return false; } From 143ba55e14277f82ed3918370cb76160a509e4a0 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 9 Oct 2024 11:48:25 -0400 Subject: [PATCH 163/183] Add table of bufferizable ops --- mlir/lib/Bufferization.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 mlir/lib/Bufferization.md diff --git a/mlir/lib/Bufferization.md b/mlir/lib/Bufferization.md new file mode 100644 index 0000000000..b8addeccb5 --- /dev/null +++ b/mlir/lib/Bufferization.md @@ -0,0 +1,23 @@ +**Bufferization Interfaces:** + +| Bufferizable Operations | PrintOp | CustomCallOp | CallbackOp | CallbackCallOp | AdjointOp | BackpropOp | ForwardOp | ReverseOp | QubitUnitaryOp | HermitianOp | HamiltonianOp | SampleOp | StateOp | ProbsOp | CountsOp | SetStateOp | SetBasisStateOp | +| --------------------------------| ---------| ------------ | ------------ | -------------- | --------- | ---------- | --------- | --------- | -------------- | ----------- | ------------- | -------- | ------- | ------- | -------- | ---------- | --------------- | +| Catagory | | catalyst | catalyst | catalyst | catalyst | gradient | gradient | gradient | gradient | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | +| bufferizesToAllocation | | true | | true | | | | | | | | | | | | | | +| bufferizesToMemoryRead | true | true | | false | true | true | | | true | true | true | false | false | false | false | false | false | +| bufferizesToMemoryWrite | false | false | | false | false | true | | | false | false | false | false | false | false | false | false | false | +| bufferizesToElementwiseAccess | | | | | | | | | | | | | | | | | | +| resultBufferizesToMemoryWrite | | | | | | | | | | | | | | | | | | +| mustBufferizeInPlace | | | | | | | | | | | | | | | | | | +| getAliasingValues | {} | {} | | {} | {} | {} | | | {} | {} | {} | {} | {} | {} | {} | {} | {} | +| getAliasingOpOperands | | | {} | | | | v | v | | | | | | | | | | +| resolveConflicts | | | | | | | | | | | | | | | | | | +| bufferize | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | +| isWritable | | | | | | | | | | | | | | | | | | +| isNotConflicting | | | | | | | | | | | | | | | | | | +| verifyAnalysis | | | | | | | v | v | | | | | | | | | | +| getBufferType | | | | | | | v | v | | | | | | | | | | +| isRepetitiveRegion | | | | | | | | | | | | | | | | | | +| isParallelRegion | | | | | | | | | | | | | | | | | | +| hasTensorSemantics | | | v | | | | v | v | | | | | | | | | | +| supportsUnstructuredControlFlow | | | true | | | | true | true | | | | | | | | | | \ No newline at end of file From 8d0324db672d507d651b46fd65e9058455a9bb03 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 9 Oct 2024 12:04:20 -0400 Subject: [PATCH 164/183] Small fix --- mlir/lib/Bufferization.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Bufferization.md b/mlir/lib/Bufferization.md index b8addeccb5..16b0cfd98a 100644 --- a/mlir/lib/Bufferization.md +++ b/mlir/lib/Bufferization.md @@ -2,7 +2,7 @@ | Bufferizable Operations | PrintOp | CustomCallOp | CallbackOp | CallbackCallOp | AdjointOp | BackpropOp | ForwardOp | ReverseOp | QubitUnitaryOp | HermitianOp | HamiltonianOp | SampleOp | StateOp | ProbsOp | CountsOp | SetStateOp | SetBasisStateOp | | --------------------------------| ---------| ------------ | ------------ | -------------- | --------- | ---------- | --------- | --------- | -------------- | ----------- | ------------- | -------- | ------- | ------- | -------- | ---------- | --------------- | -| Catagory | | catalyst | catalyst | catalyst | catalyst | gradient | gradient | gradient | gradient | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | +| Catagory | catalyst | catalyst | catalyst | catalyst | gradient | gradient | gradient | gradient | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | | bufferizesToAllocation | | true | | true | | | | | | | | | | | | | | | bufferizesToMemoryRead | true | true | | false | true | true | | | true | true | true | false | false | false | false | false | false | | bufferizesToMemoryWrite | false | false | | false | false | true | | | false | false | false | false | false | false | false | false | false | From 6dc98b95a641f847feeb0894a18ee1e629e52298 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 9 Oct 2024 12:07:30 -0400 Subject: [PATCH 165/183] Small correction --- mlir/lib/Bufferization.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Bufferization.md b/mlir/lib/Bufferization.md index 16b0cfd98a..63a14ba49d 100644 --- a/mlir/lib/Bufferization.md +++ b/mlir/lib/Bufferization.md @@ -4,7 +4,7 @@ | --------------------------------| ---------| ------------ | ------------ | -------------- | --------- | ---------- | --------- | --------- | -------------- | ----------- | ------------- | -------- | ------- | ------- | -------- | ---------- | --------------- | | Catagory | catalyst | catalyst | catalyst | catalyst | gradient | gradient | gradient | gradient | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | quantum | | bufferizesToAllocation | | true | | true | | | | | | | | | | | | | | -| bufferizesToMemoryRead | true | true | | false | true | true | | | true | true | true | false | false | false | false | false | false | +| bufferizesToMemoryRead | true | true | | true | true | true | | | true | true | true | false | false | false | false | false | false | | bufferizesToMemoryWrite | false | false | | false | false | true | | | false | false | false | false | false | false | false | false | false | | bufferizesToElementwiseAccess | | | | | | | | | | | | | | | | | | | resultBufferizesToMemoryWrite | | | | | | | | | | | | | | | | | | From 44d4597e17e8bed146822f9116219ccdd3c9d1db Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 10 Oct 2024 09:56:15 -0400 Subject: [PATCH 166/183] Set supportsUnstructuredControlFlow for CallBackOp as False --- mlir/lib/Bufferization.md | 2 +- mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Bufferization.md b/mlir/lib/Bufferization.md index 63a14ba49d..14eb74599b 100644 --- a/mlir/lib/Bufferization.md +++ b/mlir/lib/Bufferization.md @@ -20,4 +20,4 @@ | isRepetitiveRegion | | | | | | | | | | | | | | | | | | | isParallelRegion | | | | | | | | | | | | | | | | | | | hasTensorSemantics | | | v | | | | v | v | | | | | | | | | | -| supportsUnstructuredControlFlow | | | true | | | | true | true | | | | | | | | | | \ No newline at end of file +| supportsUnstructuredControlFlow | | | false | | | | true | true | | | | | | | | | | \ No newline at end of file diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 7a2e81decc..689b2a2a15 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -150,7 +150,7 @@ struct CustomCallOpInterface struct CallbackOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< CallbackOpInterface, CallbackOp> { - static bool supportsUnstructuredControlFlow() { return true; } + static bool supportsUnstructuredControlFlow() { return false; } bool hasTensorSemantics(Operation *op) const { From 3cecfaff38b289c114678bd1942ad300946dd0c1 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 10 Oct 2024 11:47:23 -0400 Subject: [PATCH 167/183] Remove uncontrolflow from CallBackOp --- .../Transforms/BufferizableOpInterfaceImpl.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 689b2a2a15..4a71807c14 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -24,13 +24,7 @@ namespace { * (if bufferizing in-place). * * `getAliasingOpOperands`: Return the OpResults that may share the same buffer as the given - * OpOperand. Note that MLIR documentation does not mention `getAliasingValues` but it seems to - * serve the same purpose. - * - * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires - * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the - * implementation of `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and - * `getAliasingOpOperands`. + * OpOperand. * * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize */ @@ -148,10 +142,8 @@ struct CustomCallOpInterface }; struct CallbackOpInterface - : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< - CallbackOpInterface, CallbackOp> { - static bool supportsUnstructuredControlFlow() { return false; } - + : public bufferization::BufferizableOpInterface::ExternalModel { bool hasTensorSemantics(Operation *op) const { auto isaTensor = llvm::IsaPred; From 2cb30e391ee809d3cc33072eceb558e974c84439 Mon Sep 17 00:00:00 2001 From: erick-xanadu <110487834+erick-xanadu@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:55:38 -0400 Subject: [PATCH 168/183] Bufferization comments (#1197) --- frontend/catalyst/compiler.py | 102 ++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 6 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 8b37a6205c..552134df6b 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -221,25 +221,112 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt ], ) +# From: https://mlir.llvm.org/docs/Bufferization/#overview +# +# Preprocessing +# | rewrite_in_destination_passing_style +# | -eliminate-empty-tensors +# Bufferization +# | -one-shot-bufferize +# Buffer-Level +# Optimizations +# | -buffer-hoisting +# | -buffer-loop-hoisting +# | -buffer-results-to-out-params +# | -drop-equivalent-buffer-results +# | -promote-buffers-to-stack +# Deallocation +# | -buffer-deallocation-pipeline + BUFFERIZATION_PASS = ( "BufferizationPass", [ "inline", "gradient-preprocess", - "eliminate-empty-tensors", "convert-elementwise-to-linalg", - "one-shot-bufferize{bufferize-function-boundaries allow-return-allocs-from-loops " - "function-boundary-type-conversion=identity-layout-map}", - "canonicalize", # Remove dead memrefToTensorOp's - "gradient-postprocess", + "canonicalize", +# Preprocessing: +# rewrite_in_destination_passing_style +# +# We are not rewriting everything in DPS before -one-shot-bufferize +# This was discussed with the main author of the -one-shot-bufferize +# pass and he stated the following: +# +# One-Shot Bufferize was designed for ops that are in DPS (destination-passing style). +# Ops that are not in DPS can still be bufferized, +# but a new buffer will be allocated for every tensor result. +# That’s functionally correct but inefficient. +# +# I’m not sure whether it’s better to first migrate to the new bufferization, +# then turn the ops into DPS ops, or do it the other way around. +# One benefit of implementing the bufferization first is that +# it’s a smaller step that you can already run end-to-end. +# And you can think of the DPS of a performance improvement on top of it. +# +# https://discourse.llvm.org/t/steps-of-migrating-to-one-shot-bufferization/81062/2 +# +# Here, please note that gradient-preprocessing is different than rewriting in DPS. +# So, overall, we are skipping this section while we first focus on migrating to the +# new -one-shot-bufferize + "eliminate-empty-tensors", + ( + "one-shot-bufferize" + "{" + "bufferize-function-boundaries " + # - Bufferize function boundaries (experimental). + # + # By default, function boundaries are not bufferized. + # This is because there are currently limitations around function graph bufferization: + # recursive calls are not supported. + # As long as there are no recursive calls, function boundary bufferization can be enabled with bufferize-function-boundaries. + # Each tensor function argument and tensor function result is then turned into a memref. + # The layout map of the memref type can be controlled with function-boundary-type-conversion. + # + # https://mlir.llvm.org/docs/Bufferization/#using-one-shot-bufferize + "allow-return-allocs-from-loops " + # - Allows returning/yielding new allocations from a loop. + # https://github.com/llvm/llvm-project/pull/83964 + # https://github.com/llvm/llvm-project/pull/87594 + "function-boundary-type-conversion=identity-layout-map" + # - Controls layout maps when bufferizing function signatures. + # You can control the memref types at the function boundary with + # function-boundary-type-conversion. E.g., if you set it to identity-layout-map, + # you should get the same type as with --func-bufferize. + # By default, we put a fully dynamic layout map strided<[?, ?], offset: ?> + # because that works best if you don't know what layout map the buffers at + # the call site have -- you can always cast a buffer to a type with + # fully dynamic layout map. (But not the other way around. That may require a reallocation.) + # + # https://discord.com/channels/636084430946959380/642426447167881246/1212338527824515102 + "}" + ), + # Remove dead memrefToTensorOp's # introduced during gradient-bufferize of callbacks + # TODO: Figure out how to remove this. + "gradient-postprocess", "func.func(buffer-hoisting)", "func.func(buffer-loop-hoisting)", + + # TODO: Figure out how to include the other buffer-level optimizations. + # -buffer-results-to-out-params, + # -drop-equivalent-buffer-results, + # -promote-buffers-to-stack + + # Deallocation + # The buffer deallocation pass has been deprecated in favor of the + # ownership-based buffer deallocation pipeline. + # The deprecated pass has some limitations that may cause memory leaks in the resulting IR. + # TODO: Switch to one-shot-bufferization once it is merged. "func.func(buffer-deallocation)", + # catalyst.list_* operations are not bufferized through + # the bufferization interface + # This is because they store a memref inside of a memref + # which is incompatible with the bufferization pipeline. "convert-arraylist-to-memref", "convert-bufferization-to-memref", - "canonicalize", # Must be after convert-bufferization-to-memref + # Must be after convert-bufferization-to-memref # otherwise there are issues in lowering of dynamic tensors. + "canonicalize", # "cse", "cp-global-memref", ], @@ -248,6 +335,9 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt BUFFERIZATION_ASYNC_PASS = ( "BufferizationPass", [ + # TODO: Can we remove copy-before-write? + # copy-before-write: + # Skip the analysis. Make a buffer copy on every write. s.replace("}", " copy-before-write}") if s.startswith("one-shot-bufferize") else s for s in BUFFERIZATION_PASS[1] ], From 812bf7f69bc576206eded493f5b55cd625d24b15 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 10 Oct 2024 11:57:29 -0400 Subject: [PATCH 169/183] Entirely remove all unstructure control flow --- mlir/lib/Bufferization.md | 8 +-- .../BufferizableOpInterfaceImpl.cpp | 1 - .../BufferizableOpInterfaceImpl.cpp | 50 ++----------------- 3 files changed, 8 insertions(+), 51 deletions(-) diff --git a/mlir/lib/Bufferization.md b/mlir/lib/Bufferization.md index 14eb74599b..2fefd566c3 100644 --- a/mlir/lib/Bufferization.md +++ b/mlir/lib/Bufferization.md @@ -10,14 +10,14 @@ | resultBufferizesToMemoryWrite | | | | | | | | | | | | | | | | | | | mustBufferizeInPlace | | | | | | | | | | | | | | | | | | | getAliasingValues | {} | {} | | {} | {} | {} | | | {} | {} | {} | {} | {} | {} | {} | {} | {} | -| getAliasingOpOperands | | | {} | | | | v | v | | | | | | | | | | +| getAliasingOpOperands | | | {} | | | | {} | {} | | | | | | | | | | | resolveConflicts | | | | | | | | | | | | | | | | | | | bufferize | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | | isWritable | | | | | | | | | | | | | | | | | | | isNotConflicting | | | | | | | | | | | | | | | | | | -| verifyAnalysis | | | | | | | v | v | | | | | | | | | | -| getBufferType | | | | | | | v | v | | | | | | | | | | +| verifyAnalysis | | | | | | | | | | | | | | | | | | +| getBufferType | | | | | | | | | | | | | | | | | | | isRepetitiveRegion | | | | | | | | | | | | | | | | | | | isParallelRegion | | | | | | | | | | | | | | | | | | | hasTensorSemantics | | | v | | | | v | v | | | | | | | | | | -| supportsUnstructuredControlFlow | | | false | | | | true | true | | | | | | | | | | \ No newline at end of file +| supportsUnstructuredControlFlow | | | | | | | | | | | | | | | | | | \ No newline at end of file diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 4a71807c14..8411b9bf78 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,6 +1,5 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/Builders.h" diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index ce55763343..9bad418c0c 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,14 +1,10 @@ #include "mlir/Conversion/LLVMCommon/MemRefBuilder.h" -#include "mlir/Conversion/LLVMCommon/Pattern.h" -#include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/Interfaces/FunctionInterfaces.h" @@ -17,7 +13,6 @@ #include "Gradient/IR/GradientOps.h" #include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" #include "Gradient/Utils/GradientShape.h" -#include "Quantum/IR/QuantumOps.h" #include "llvm/ADT/STLExtras.h" using namespace mlir; @@ -37,11 +32,6 @@ namespace { * OpOperand. Note that MLIR documentation does not mention `getAliasingValues` but it seems to * serve the same purpose. * - * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires - * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the - * implementation of `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and - * `getAliasingOpOperands`. - * * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize */ @@ -292,8 +282,7 @@ struct BackpropOpInterface }; struct ForwardOpInterface - : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< - ForwardOpInterface, ForwardOp> { + : public bufferization::BufferizableOpInterface::ExternalModel { static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const @@ -316,22 +305,7 @@ struct ForwardOpInterface getAliasingOpOperands(Operation *op, Value value, const bufferization::AnalysisState &state) const { - return getAliasingBranchOpOperands(op, cast(value), state); - } - - FailureOr getBufferType(Operation *op, Value value, - const bufferization::BufferizationOptions &options, - SmallVector &invocationStack) const - { - auto forwardOp = cast(op); - auto bbArg = cast(value); - - // Function arguments are special. - if (bbArg.getOwner() == &forwardOp.getBody().front()) - return getBufferizedFunctionArgType(forwardOp, bbArg.getArgNumber(), options); - - return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel::getBufferType( - op, value, options, invocationStack); + return {}; } LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const @@ -413,8 +387,7 @@ struct ForwardOpInterface }; struct ReverseOpInterface - : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< - ReverseOpInterface, ReverseOp> { + : public bufferization::BufferizableOpInterface::ExternalModel { static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const @@ -437,22 +410,7 @@ struct ReverseOpInterface getAliasingOpOperands(Operation *op, Value value, const bufferization::AnalysisState &state) const { - return getAliasingBranchOpOperands(op, cast(value), state); - } - - FailureOr getBufferType(Operation *op, Value value, - const bufferization::BufferizationOptions &options, - SmallVector &invocationStack) const - { - auto reverseOp = cast(op); - auto bbArg = cast(value); - - // Function arguments are special. - if (bbArg.getOwner() == &reverseOp.getBody().front()) - return getBufferizedFunctionArgType(reverseOp, bbArg.getArgNumber(), options); - - return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel::getBufferType( - op, value, options, invocationStack); + return {}; } LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const From c76698a8a51e0034308dc7a045cb652772c57bc1 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 10 Oct 2024 11:58:41 -0400 Subject: [PATCH 170/183] Reformatting --- frontend/catalyst/compiler.py | 106 +++++++++++++++++----------------- 1 file changed, 52 insertions(+), 54 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 552134df6b..20828f4c05 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -245,60 +245,60 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-preprocess", "convert-elementwise-to-linalg", "canonicalize", -# Preprocessing: -# rewrite_in_destination_passing_style -# -# We are not rewriting everything in DPS before -one-shot-bufferize -# This was discussed with the main author of the -one-shot-bufferize -# pass and he stated the following: -# -# One-Shot Bufferize was designed for ops that are in DPS (destination-passing style). -# Ops that are not in DPS can still be bufferized, -# but a new buffer will be allocated for every tensor result. -# That’s functionally correct but inefficient. -# -# I’m not sure whether it’s better to first migrate to the new bufferization, -# then turn the ops into DPS ops, or do it the other way around. -# One benefit of implementing the bufferization first is that -# it’s a smaller step that you can already run end-to-end. -# And you can think of the DPS of a performance improvement on top of it. -# -# https://discourse.llvm.org/t/steps-of-migrating-to-one-shot-bufferization/81062/2 -# -# Here, please note that gradient-preprocessing is different than rewriting in DPS. -# So, overall, we are skipping this section while we first focus on migrating to the -# new -one-shot-bufferize + # Preprocessing: + # rewrite_in_destination_passing_style + # + # We are not rewriting everything in DPS before -one-shot-bufferize + # This was discussed with the main author of the -one-shot-bufferize + # pass and he stated the following: + # + # One-Shot Bufferize was designed for ops that are in DPS (destination-passing style). + # Ops that are not in DPS can still be bufferized, + # but a new buffer will be allocated for every tensor result. + # That’s functionally correct but inefficient. + # + # I’m not sure whether it’s better to first migrate to the new bufferization, + # then turn the ops into DPS ops, or do it the other way around. + # One benefit of implementing the bufferization first is that + # it’s a smaller step that you can already run end-to-end. + # And you can think of the DPS of a performance improvement on top of it. + # + # https://discourse.llvm.org/t/steps-of-migrating-to-one-shot-bufferization/81062/2 + # + # Here, please note that gradient-preprocessing is different than rewriting in DPS. + # So, overall, we are skipping this section while we first focus on migrating to the + # new -one-shot-bufferize "eliminate-empty-tensors", ( - "one-shot-bufferize" - "{" - "bufferize-function-boundaries " - # - Bufferize function boundaries (experimental). - # - # By default, function boundaries are not bufferized. - # This is because there are currently limitations around function graph bufferization: - # recursive calls are not supported. - # As long as there are no recursive calls, function boundary bufferization can be enabled with bufferize-function-boundaries. - # Each tensor function argument and tensor function result is then turned into a memref. - # The layout map of the memref type can be controlled with function-boundary-type-conversion. - # - # https://mlir.llvm.org/docs/Bufferization/#using-one-shot-bufferize - "allow-return-allocs-from-loops " - # - Allows returning/yielding new allocations from a loop. - # https://github.com/llvm/llvm-project/pull/83964 - # https://github.com/llvm/llvm-project/pull/87594 - "function-boundary-type-conversion=identity-layout-map" - # - Controls layout maps when bufferizing function signatures. - # You can control the memref types at the function boundary with - # function-boundary-type-conversion. E.g., if you set it to identity-layout-map, - # you should get the same type as with --func-bufferize. - # By default, we put a fully dynamic layout map strided<[?, ?], offset: ?> - # because that works best if you don't know what layout map the buffers at - # the call site have -- you can always cast a buffer to a type with - # fully dynamic layout map. (But not the other way around. That may require a reallocation.) - # - # https://discord.com/channels/636084430946959380/642426447167881246/1212338527824515102 - "}" + "one-shot-bufferize" + "{" + "bufferize-function-boundaries " + # - Bufferize function boundaries (experimental). + # + # By default, function boundaries are not bufferized. + # This is because there are currently limitations around function graph bufferization: + # recursive calls are not supported. + # As long as there are no recursive calls, function boundary bufferization can be enabled with bufferize-function-boundaries. + # Each tensor function argument and tensor function result is then turned into a memref. + # The layout map of the memref type can be controlled with function-boundary-type-conversion. + # + # https://mlir.llvm.org/docs/Bufferization/#using-one-shot-bufferize + "allow-return-allocs-from-loops " + # - Allows returning/yielding new allocations from a loop. + # https://github.com/llvm/llvm-project/pull/83964 + # https://github.com/llvm/llvm-project/pull/87594 + "function-boundary-type-conversion=identity-layout-map" + # - Controls layout maps when bufferizing function signatures. + # You can control the memref types at the function boundary with + # function-boundary-type-conversion. E.g., if you set it to identity-layout-map, + # you should get the same type as with --func-bufferize. + # By default, we put a fully dynamic layout map strided<[?, ?], offset: ?> + # because that works best if you don't know what layout map the buffers at + # the call site have -- you can always cast a buffer to a type with + # fully dynamic layout map. (But not the other way around. That may require a reallocation.) + # + # https://discord.com/channels/636084430946959380/642426447167881246/1212338527824515102 + "}" ), # Remove dead memrefToTensorOp's # introduced during gradient-bufferize of callbacks @@ -306,12 +306,10 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt "gradient-postprocess", "func.func(buffer-hoisting)", "func.func(buffer-loop-hoisting)", - # TODO: Figure out how to include the other buffer-level optimizations. # -buffer-results-to-out-params, # -drop-equivalent-buffer-results, # -promote-buffers-to-stack - # Deallocation # The buffer deallocation pass has been deprecated in favor of the # ownership-based buffer deallocation pipeline. From 5b488b2eb484153d4ba3d7dc2e209b15c2278d15 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 10 Oct 2024 13:38:49 -0400 Subject: [PATCH 171/183] Revert "Entirely remove all unstructure control flow" This reverts commit 812bf7f69bc576206eded493f5b55cd625d24b15. --- mlir/lib/Bufferization.md | 8 +-- .../BufferizableOpInterfaceImpl.cpp | 1 + .../BufferizableOpInterfaceImpl.cpp | 50 +++++++++++++++++-- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Bufferization.md b/mlir/lib/Bufferization.md index 2fefd566c3..14eb74599b 100644 --- a/mlir/lib/Bufferization.md +++ b/mlir/lib/Bufferization.md @@ -10,14 +10,14 @@ | resultBufferizesToMemoryWrite | | | | | | | | | | | | | | | | | | | mustBufferizeInPlace | | | | | | | | | | | | | | | | | | | getAliasingValues | {} | {} | | {} | {} | {} | | | {} | {} | {} | {} | {} | {} | {} | {} | {} | -| getAliasingOpOperands | | | {} | | | | {} | {} | | | | | | | | | | +| getAliasingOpOperands | | | {} | | | | v | v | | | | | | | | | | | resolveConflicts | | | | | | | | | | | | | | | | | | | bufferize | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | v | | isWritable | | | | | | | | | | | | | | | | | | | isNotConflicting | | | | | | | | | | | | | | | | | | -| verifyAnalysis | | | | | | | | | | | | | | | | | | -| getBufferType | | | | | | | | | | | | | | | | | | +| verifyAnalysis | | | | | | | v | v | | | | | | | | | | +| getBufferType | | | | | | | v | v | | | | | | | | | | | isRepetitiveRegion | | | | | | | | | | | | | | | | | | | isParallelRegion | | | | | | | | | | | | | | | | | | | hasTensorSemantics | | | v | | | | v | v | | | | | | | | | | -| supportsUnstructuredControlFlow | | | | | | | | | | | | | | | | | | \ No newline at end of file +| supportsUnstructuredControlFlow | | | false | | | | true | true | | | | | | | | | | \ No newline at end of file diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 8411b9bf78..4a71807c14 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,5 +1,6 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/Builders.h" diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 9bad418c0c..ce55763343 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -1,10 +1,14 @@ #include "mlir/Conversion/LLVMCommon/MemRefBuilder.h" +#include "mlir/Conversion/LLVMCommon/Pattern.h" +#include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/Interfaces/FunctionInterfaces.h" @@ -13,6 +17,7 @@ #include "Gradient/IR/GradientOps.h" #include "Gradient/Transforms/BufferizableOpInterfaceImpl.h" #include "Gradient/Utils/GradientShape.h" +#include "Quantum/IR/QuantumOps.h" #include "llvm/ADT/STLExtras.h" using namespace mlir; @@ -32,6 +37,11 @@ namespace { * OpOperand. Note that MLIR documentation does not mention `getAliasingValues` but it seems to * serve the same purpose. * + * Bufferizing FunctionOpInterface is also not documented by MLIR. It requires + * `OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel`, which requires the + * implementation of `supportsUnstructuredControlFlow`, `hasTensorSemantics`, and + * `getAliasingOpOperands`. + * * Link: https://mlir.llvm.org/docs/Bufferization/#extending-one-shot-bufferize */ @@ -282,7 +292,8 @@ struct BackpropOpInterface }; struct ForwardOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< + ForwardOpInterface, ForwardOp> { static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const @@ -305,7 +316,22 @@ struct ForwardOpInterface getAliasingOpOperands(Operation *op, Value value, const bufferization::AnalysisState &state) const { - return {}; + return getAliasingBranchOpOperands(op, cast(value), state); + } + + FailureOr getBufferType(Operation *op, Value value, + const bufferization::BufferizationOptions &options, + SmallVector &invocationStack) const + { + auto forwardOp = cast(op); + auto bbArg = cast(value); + + // Function arguments are special. + if (bbArg.getOwner() == &forwardOp.getBody().front()) + return getBufferizedFunctionArgType(forwardOp, bbArg.getArgNumber(), options); + + return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel::getBufferType( + op, value, options, invocationStack); } LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const @@ -387,7 +413,8 @@ struct ForwardOpInterface }; struct ReverseOpInterface - : public bufferization::BufferizableOpInterface::ExternalModel { + : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< + ReverseOpInterface, ReverseOp> { static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const @@ -410,7 +437,22 @@ struct ReverseOpInterface getAliasingOpOperands(Operation *op, Value value, const bufferization::AnalysisState &state) const { - return {}; + return getAliasingBranchOpOperands(op, cast(value), state); + } + + FailureOr getBufferType(Operation *op, Value value, + const bufferization::BufferizationOptions &options, + SmallVector &invocationStack) const + { + auto reverseOp = cast(op); + auto bbArg = cast(value); + + // Function arguments are special. + if (bbArg.getOwner() == &reverseOp.getBody().front()) + return getBufferizedFunctionArgType(reverseOp, bbArg.getArgNumber(), options); + + return OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel::getBufferType( + op, value, options, invocationStack); } LogicalResult verifyAnalysis(Operation *op, const bufferization::AnalysisState &state) const From 3646328868f4f23615aefdc09610711147e62008 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Thu, 10 Oct 2024 14:00:54 -0400 Subject: [PATCH 172/183] set supportsUnstructuredControlFlow for forwardOp and reverseOp to false --- .../Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index ce55763343..dc130a295f 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -294,7 +294,7 @@ struct BackpropOpInterface struct ForwardOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< ForwardOpInterface, ForwardOp> { - static bool supportsUnstructuredControlFlow() { return true; } + static bool supportsUnstructuredControlFlow() { return false; } bool hasTensorSemantics(Operation *op) const { @@ -316,7 +316,7 @@ struct ForwardOpInterface getAliasingOpOperands(Operation *op, Value value, const bufferization::AnalysisState &state) const { - return getAliasingBranchOpOperands(op, cast(value), state); + return {}; } FailureOr getBufferType(Operation *op, Value value, @@ -415,7 +415,7 @@ struct ForwardOpInterface struct ReverseOpInterface : public bufferization::OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel< ReverseOpInterface, ReverseOp> { - static bool supportsUnstructuredControlFlow() { return true; } + static bool supportsUnstructuredControlFlow() { return false; } bool hasTensorSemantics(Operation *op) const { @@ -437,7 +437,7 @@ struct ReverseOpInterface getAliasingOpOperands(Operation *op, Value value, const bufferization::AnalysisState &state) const { - return getAliasingBranchOpOperands(op, cast(value), state); + return {}; } FailureOr getBufferType(Operation *op, Value value, From 938df918e4cec86e3e71fd269f2868eb84b22e38 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 11 Oct 2024 16:20:29 -0400 Subject: [PATCH 173/183] Update mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp Co-authored-by: erick-xanadu <110487834+erick-xanadu@users.noreply.github.com> --- mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index fe11fcc64a..76349ec74d 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -321,7 +321,7 @@ struct SetStateOpInterface bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return false; + return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, From 6c7791635180baef38cde7b138bcb8f6612d6273 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Fri, 11 Oct 2024 16:20:38 -0400 Subject: [PATCH 174/183] Update mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp Co-authored-by: erick-xanadu <110487834+erick-xanadu@users.noreply.github.com> --- mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp index 76349ec74d..2e61dfe579 100644 --- a/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Quantum/Transforms/BufferizableOpInterfaceImpl.cpp @@ -362,7 +362,7 @@ struct SetBasisStateOpInterface bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return false; + return true; } bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, From d1c0bde78fbbbeb9762dd750eb183f4ccb488a8c Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Thu, 10 Oct 2024 19:10:48 -0400 Subject: [PATCH 175/183] wip --- frontend/catalyst/compiler.py | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 20828f4c05..14b9e2a6eb 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -268,8 +268,57 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt # Here, please note that gradient-preprocessing is different than rewriting in DPS. # So, overall, we are skipping this section while we first focus on migrating to the # new -one-shot-bufferize + "eliminate-empty-tensors", + ( + # Before we enter one-shot-bufferize, here is what we expect: + # * Given + # + # One-Shot Bufferize was designed for ops that are in DPS (destination-passing style). + # Ops that are not in DPS can still be bufferized, + # but a new buffer will be allocated for every tensor result. + # That’s functionally correct but inefficient. + # + # from: https://discourse.llvm.org/t/steps-of-migrating-to-one-shot-bufferization/81062/2 + # we expect that results will be (automatically?) converted into new buffers. And it is + # up to us to just define the bufferization for the operands. + # + # So what is the state of the catalyst, gradient, quantum dialects at this point? + # + # Let's start with quantum: + # + # |-------------------------|--------------------| + # | operation | has result tensor | + # |-------------------------|--------------------| + # | quantum.set_state | | + # | quantum.set_basis_state | | + # | quantum.unitary | | + # | quantum.hermitian | | + # | quantum.hamiltonian | | + # | quantum.sample_op | YES | + # | quantum.counts_op | YES | + # | quantum.probs_op | YES | + # | quantum.state_op | YES | + # |-------------------------|--------------------| + # | catalyst.print_op | | + # | catalyst.custom_call | YES | + # | catalyst.callback | | + # | catalyst.callback_call | YES | + # | catalyst.launch_kernel | YES | + # |-------------------------|--------------------| + # | gradient.grad | YES | + # | gradient.value_and_grad | YES | + # | gradient.adjoint | YES | + # | gradient.backprop | YES | + # | gradient.jvp | YES | + # | gradient.vjp | YES | + # | gradient.forward | YES | + # | gradient.reverse | YES | + # |-------------------------|--------------------| + # + # So what this means is that for the operands, all the ones that have the YES + # means that no operands are written to. They are only read. "one-shot-bufferize" "{" "bufferize-function-boundaries " From 12bf6186cb99a0bd5d68c74d38c524ac7d38d7b3 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Fri, 11 Oct 2024 14:45:11 -0400 Subject: [PATCH 176/183] Comments --- .../BufferizableOpInterfaceImpl.cpp | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp index 4a71807c14..c9033716d5 100644 --- a/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Catalyst/Transforms/BufferizableOpInterfaceImpl.cpp @@ -31,6 +31,7 @@ namespace { /// Bufferization of catalyst.print. Get memref of printOp.val. struct PrintOpInterface + // PrintOp will never write to the buffers : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const @@ -68,8 +69,16 @@ struct PrintOpInterface /// Bufferization of catalyst.custom_call. Mainly get buffers for arguments. struct CustomCallOpInterface + // CustomCallOp will interface with BLAS functions. + // This operations is not in DPS form. This means that + // if we can guarantee operands are never written to, then we can set + // bufferizesToMemoryWrite as false. + // Results will be allocated a new buffer. + // TODO: Double check BLAS and others. Until then, it should be safe to keep + // bufferizesToMemoryWrite as True. : public bufferization::BufferizableOpInterface::ExternalModel { + bool bufferizesToAllocation(Operation *op, Value value) const { return true; } bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, @@ -81,7 +90,7 @@ struct CustomCallOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return false; + return true; } bufferization::AliasingValueList @@ -205,9 +214,12 @@ struct CallbackCallOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - // The method returns `true` if the given OpOperand bufferizes to a memory write. - // Since CallbackCallOp is related to print, which does not write to its operand, - // Maybe we can set this to `false`. + // We can safely say false because CallbackCallOp's memrefs + // will be put in a JAX array and JAX arrays are immutable. + // + // Unlike NumPy arrays, JAX arrays are always immutable. + // + // https://jax.readthedocs.io/en/latest/notebooks/thinking_in_jax.html return false; } From 3c0bb0a2a4b89c38af951990b1c5b9397f8546e0 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Fri, 11 Oct 2024 15:08:54 -0400 Subject: [PATCH 177/183] f --- .../BufferizableOpInterfaceImpl.cpp | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index dc130a295f..6fb383c7c9 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -83,6 +83,16 @@ Value generateAllocation(OpBuilder &builder, Location loc, Value reference) { auto origMemrefType = cast(reference.getType()); // Rebuild MemRefType without memory layout. + // TODO: Investigate + // + // Something looks odd here. + // The result of a `memref.alloc` should be a memref without identity layout. + // I know that the op supports operands for dims/symbols in the memref type, + // but I never understood why. + // Imo, a `memref.alloc() : memref` should have been generated. + // The result value can then be casted to `memref>`. + // + // https://discord.com/channels/636084430946959380/642426447167881246/1281710682160627785 auto memrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); // Get dynamic dimension sizes from the provided reference value if necessary. SmallVector dynamicDims; @@ -112,6 +122,7 @@ void generateAllocations(RewriterBase &rewriter, Location loc, SmallVectorImpl { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const @@ -169,6 +180,13 @@ struct AdjointOpInterface }; struct BackpropOpInterface + // This operation is not in DPS style + // but it has a lot of parameters, notably: + // Variadic: $args + // Variadic<...RankedTensorOf<[AnyFloat]>>: $cotangents + // I think we don't write to the cotangents. And also not to the arguments + // so we can set bufferizesToMemoryWrite as false. + // The safe assumption is that it should be true. : public bufferization::BufferizableOpInterface::ExternalModel { bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, @@ -180,7 +198,7 @@ struct BackpropOpInterface bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, const bufferization::AnalysisState &state) const { - return true; + return false; } bufferization::AliasingValueList @@ -543,4 +561,4 @@ void catalyst::gradient::registerBufferizableOpInterfaceExternalModels(DialectRe ForwardOp::attachInterface(*ctx); ReverseOp::attachInterface(*ctx); }); -} \ No newline at end of file +} From 619bf3d6875fa03a1a3062e1c86936f72cee0054 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 15 Oct 2024 12:59:48 -0400 Subject: [PATCH 178/183] More comments --- .../BufferizableOpInterfaceImpl.cpp | 59 +++++++++++++++++-- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 6fb383c7c9..0cc45a056c 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -82,8 +82,22 @@ static ReturnOp getAssumedUniqueReturnOp(FunctionOpInterface funcOp) Value generateAllocation(OpBuilder &builder, Location loc, Value reference) { auto origMemrefType = cast(reference.getType()); - // Rebuild MemRefType without memory layout. - // TODO: Investigate + // TODO: Investigate how to get rid of identity-layout-map + // + // Hi all. For one-shot-bufferization, is there any automatic way to pass all memref symbols + // to AllocOp? we have an example below that triggers error: 'memref.alloc' op symbol + // operand count does not equal memref symbol count: expected 1, got 0 . We think we have + // to pass the offset symbol to AllocOp. + // + // %0 = "bufferization.to_memref"(%arg0) : (tensor) -> memref> %1 = "memref.alloc"() <{operandSegmentSizes = array}> : () -> + // memref> + // + // We know we can set function-signature-type-conversion=identity-layout-map to get rid of + // it. But according to the document, identity-layout-map could be less efficient, we still + // want to stick with the default setting. + // + // https://discord.com/channels/636084430946959380/642426447167881246/1281620504859512914 // // Something looks odd here. // The result of a `memref.alloc` should be a memref without identity layout. @@ -93,8 +107,42 @@ Value generateAllocation(OpBuilder &builder, Location loc, Value reference) // The result value can then be casted to `memref>`. // // https://discord.com/channels/636084430946959380/642426447167881246/1281710682160627785 - auto memrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); - // Get dynamic dimension sizes from the provided reference value if necessary. + // + // What I find interesting is that the comment says that + // + // "we know we can set function-signature-type-conversion=identity-layout-map to get rid of + // it" + // + // and that is what we are using, however we still have this rebuilding a memref without the + // layout. If that were true, then we could uncomment the following line and it should work. + auto memrefType = origMemrefType; + // I can confirm that having + // function-signature-type-conversion=identity-layout-map makes the line above succed while the + // line below fail: + // + // Get dynamic dimension sizes from the provided reference value if necessary. + // auto memrefType = MemRefType::get(origMemrefType.getShape(), + // origMemrefType.getElementType()); + // + // Looking at this a little bit deeper, I can say that the variable reference + // appears to come from a function parameter. + // and since it is not the identity layout, then we see the following generic MLIR when not + // using identity layout + // + // "func.func"() <{function_type = (memref>) -> memref> + // + // and we see this when using the identity layout: + // + // func.func public @jit_fn(%arg0: memref) -> memref + // + // When not using identity layout but also not removing the layout in the alloca, there are + // errors in some cases but not in others. I believe we have to do some casts in other places as + // well, whenever we use allocas and the types come from the arguments. + // + // My recommendation: at some point it would be good to remove the identity-layout-map from the + // frontend but until we have some more resources, let's keep it along with the origMemrefType. + SmallVector dynamicDims; if (!memrefType.hasStaticShape()) { for (int64_t dim = 0; dim < memrefType.getRank(); dim++) { @@ -106,6 +154,9 @@ Value generateAllocation(OpBuilder &builder, Location loc, Value reference) } return builder.create(loc, memrefType, dynamicDims); + // Uncomment below to follow Matthias suggestion of placing a CastOp after AllocOp + // some more tests will pass. + // return builder.create(loc, origMemrefType, alloc_uncasted); } /// Helper function to generate a set of memref allocations. From b49881e8de89cc02c4eccf8bf9163d5b26ebfba9 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 15 Oct 2024 14:07:20 -0400 Subject: [PATCH 179/183] Fix comments --- .../Transforms/PostprocessingPatterns.cpp | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp index cfef5c9aae..21af67294d 100644 --- a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp @@ -210,21 +210,26 @@ struct RestoreReverseOp : public OpRewritePattern { * This pass aims to revert the changed made by One-shot bufferize. * * For example, - * ``` - * gradient.reverse @bwd.rev(%arg0: memref) -> memref, memref attributes {argc = - * 2 : i64, implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape - * = 0 : i64} { %0 = func.call @bwd(%arg0) : (memref) -> memref %alloc = - * memref.alloc() {alignment = 64 : i64} : memref memref.copy %0, %alloc : memref to - * memref gradient.return {empty = true} %alloc : memref + * + * ```mlir + * gradient.reverse @bwd.rev(%arg0: memref) -> memref, memref + * attributes {argc = 2 : i64, implementation = @bwd, resc = 1 : i64, tape = 0 : i64} { + * %0 = func.call @bwd(%arg0) : (memref) -> memref + * %alloc = memref.alloc() {alignment = 64 : i64} : memref + * memref.copy %0, %alloc : memref to memref + * gradient.return {empty = true} %alloc : memref * } * ``` + * * will be turned into - * * ``` - * gradient.reverse @bwd.rev(%arg0: memref) -> memref attributes {argc = 2 : i64, - * implementation = @bwd, llvm.linkage = #llvm.linkage, resc = 1 : i64, tape = 0 : - * i64} { %0 = func.call @bwd(%arg0) : (memref) -> memref %alloc = memref.alloc() - * {alignment = 64 : i64} : memref memref.copy %0, %alloc : memref to memref - * gradient.return {empty = true} %alloc : memref + * + * ```mlir + * gradient.reverse @bwd.rev(%arg0: memref) -> memref + * attributes {argc = 2 : i64, implementation = @bwd, resc = 1 : i64, tape = 0 : i64} { + * %0 = func.call @bwd(%arg0) : (memref) -> memref + * %alloc = memref.alloc() {alignment = 64 : i64} : memref + * memref.copy %0, %alloc : memref to memref + * gradient.return {empty = true} %alloc : memref * } * ``` * @@ -301,4 +306,4 @@ void populatePostprocessingPatterns(RewritePatternSet &patterns) } } // namespace gradient -} // namespace catalyst \ No newline at end of file +} // namespace catalyst From 9bda879ab4568551aa666d50d873c328164274ba Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 15 Oct 2024 15:08:16 -0400 Subject: [PATCH 180/183] Remove unnecessary code --- .../Transforms/PostprocessingPatterns.cpp | 89 ------------------- 1 file changed, 89 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp index 21af67294d..16c0243352 100644 --- a/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp +++ b/mlir/lib/Gradient/Transforms/PostprocessingPatterns.cpp @@ -205,94 +205,6 @@ struct PostprocessReverseOp : public OpRewritePattern { } }; -struct RestoreReverseOp : public OpRewritePattern { - /* One-shot bufferize optimizes away the return values that are not used. - * This pass aims to revert the changed made by One-shot bufferize. - * - * For example, - * - * ```mlir - * gradient.reverse @bwd.rev(%arg0: memref) -> memref, memref - * attributes {argc = 2 : i64, implementation = @bwd, resc = 1 : i64, tape = 0 : i64} { - * %0 = func.call @bwd(%arg0) : (memref) -> memref - * %alloc = memref.alloc() {alignment = 64 : i64} : memref - * memref.copy %0, %alloc : memref to memref - * gradient.return {empty = true} %alloc : memref - * } - * ``` - * - * will be turned into - * - * ```mlir - * gradient.reverse @bwd.rev(%arg0: memref) -> memref - * attributes {argc = 2 : i64, implementation = @bwd, resc = 1 : i64, tape = 0 : i64} { - * %0 = func.call @bwd(%arg0) : (memref) -> memref - * %alloc = memref.alloc() {alignment = 64 : i64} : memref - * memref.copy %0, %alloc : memref to memref - * gradient.return {empty = true} %alloc : memref - * } - * ``` - * - * However, Enzyme expects to see the removed return. We have to add it back. - */ - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult matchAndRewrite(ReverseOp op, - mlir::PatternRewriter &rewriter) const override - { - auto forwardArgc = op.getArgc(); - auto forwardResc = op.getResc(); - auto tape = op.getTape(); - - // Check if the Op is post-processed. - if (op.getFunctionType().getNumInputs() == (forwardResc + forwardArgc) * 2 + tape) - return failure(); - - // If function signature is modified, this pass cannot be processed. - if (op.getFunctionType().getNumResults() >= forwardArgc) - return failure(); - - // get parenet module - auto module = op->getParentOfType(); - - // Get GradOp - CustomGradOp gradCaller = nullptr; - for (auto gradOp : module.getOps()) { - if (gradOp.getReverse() == op.getSymName()) { - gradCaller = gradOp; - } - } - - if (!gradCaller) - return failure(); - - ForwardOp target = nullptr; - // get corresponding FowardOp - for (auto forwardOp : module.getOps()) { - if (forwardOp.getSymName() == gradCaller.getForward()) { - target = forwardOp; - } - } - - if (!target) - return failure(); - - auto forwardArgTys = target.getArgumentTypes(); - SmallVector noTapeTys; - for (size_t i = 0; i < forwardArgTys.size(); ++i) { - if (i < op.getArgc()) { - noTapeTys.push_back(forwardArgTys[i]); - } - } - - auto reverseTy = rewriter.getFunctionType(op.getArgumentTypes(), noTapeTys); - - rewriter.modifyOpInPlace(op, [&] { op.setFunctionType(reverseTy); }); - - return failure(); - } -}; - } // namespace namespace catalyst { @@ -300,7 +212,6 @@ namespace gradient { void populatePostprocessingPatterns(RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); patterns.add(patterns.getContext()); patterns.add(patterns.getContext()); } From 227c08e9b9886e6cf6e8101213cbefedd68322c1 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 15 Oct 2024 15:10:28 -0400 Subject: [PATCH 181/183] style --- frontend/catalyst/compiler.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index 14b9e2a6eb..aed9cd131f 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -268,9 +268,7 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt # Here, please note that gradient-preprocessing is different than rewriting in DPS. # So, overall, we are skipping this section while we first focus on migrating to the # new -one-shot-bufferize - "eliminate-empty-tensors", - ( # Before we enter one-shot-bufferize, here is what we expect: # * Given From b179e70357368fbdb1914597c720e1305c969e5d Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 15 Oct 2024 15:30:26 -0400 Subject: [PATCH 182/183] Fix --- mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp index 0cc45a056c..99b1b4396b 100644 --- a/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Gradient/Transforms/BufferizableOpInterfaceImpl.cpp @@ -115,14 +115,13 @@ Value generateAllocation(OpBuilder &builder, Location loc, Value reference) // // and that is what we are using, however we still have this rebuilding a memref without the // layout. If that were true, then we could uncomment the following line and it should work. - auto memrefType = origMemrefType; + // auto memrefType = origMemrefType; // I can confirm that having // function-signature-type-conversion=identity-layout-map makes the line above succed while the // line below fail: // // Get dynamic dimension sizes from the provided reference value if necessary. - // auto memrefType = MemRefType::get(origMemrefType.getShape(), - // origMemrefType.getElementType()); + auto memrefType = MemRefType::get(origMemrefType.getShape(), origMemrefType.getElementType()); // // Looking at this a little bit deeper, I can say that the variable reference // appears to come from a function parameter. From e4235d756e337315eb83a89e039a1214b87ae2b5 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 15 Oct 2024 15:30:34 -0400 Subject: [PATCH 183/183] line length --- frontend/catalyst/compiler.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/frontend/catalyst/compiler.py b/frontend/catalyst/compiler.py index aed9cd131f..bd580b3950 100644 --- a/frontend/catalyst/compiler.py +++ b/frontend/catalyst/compiler.py @@ -273,14 +273,16 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt # Before we enter one-shot-bufferize, here is what we expect: # * Given # - # One-Shot Bufferize was designed for ops that are in DPS (destination-passing style). + # One-Shot Bufferize was designed for ops that are in DPS + # (destination-passing style). # Ops that are not in DPS can still be bufferized, # but a new buffer will be allocated for every tensor result. # That’s functionally correct but inefficient. # - # from: https://discourse.llvm.org/t/steps-of-migrating-to-one-shot-bufferization/81062/2 - # we expect that results will be (automatically?) converted into new buffers. And it is - # up to us to just define the bufferization for the operands. + # https://discourse.llvm.org/t/steps-of-migrating-to-one-shot-bufferization/81062/2 + # + # we expect that results will be (automatically?) converted into new buffers. And it + # is up to us to just define the bufferization for the operands. # # So what is the state of the catalyst, gradient, quantum dialects at this point? # @@ -323,9 +325,11 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt # - Bufferize function boundaries (experimental). # # By default, function boundaries are not bufferized. - # This is because there are currently limitations around function graph bufferization: + # This is because there are currently limitations around function graph + # bufferization: # recursive calls are not supported. - # As long as there are no recursive calls, function boundary bufferization can be enabled with bufferize-function-boundaries. + # As long as there are no recursive calls, function boundary bufferization can be + # enabled with bufferize-function-boundaries. # Each tensor function argument and tensor function result is then turned into a memref. # The layout map of the memref type can be controlled with function-boundary-type-conversion. # @@ -342,7 +346,8 @@ def run_writing_command(command: List[str], compile_options: Optional[CompileOpt # By default, we put a fully dynamic layout map strided<[?, ?], offset: ?> # because that works best if you don't know what layout map the buffers at # the call site have -- you can always cast a buffer to a type with - # fully dynamic layout map. (But not the other way around. That may require a reallocation.) + # fully dynamic layout map. (But not the other way around. That may require a + # reallocation.) # # https://discord.com/channels/636084430946959380/642426447167881246/1212338527824515102 "}"