Since compiler generated stick/unstick is default on, change new option to disable it (#3073)

AlexandreEichenberger · web-flow · commit 86b8f18e72ba · 2025-02-13T18:27:11.000-05:00
Signed-off-by: Alexandre Eichenberger &lt;alexe@us.ibm.com&gt;
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
@@ -49,11 +49,11 @@ llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick(
 
 // Enabled default now, could also enable it only if parallel is on as parallel
 // stick/unstick is quite a bit faster than sequential.
-llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick(
-    "enable-compiler-stick-unstick",
-    llvm::cl::desc("[Experimental feature] Enable the compiler generate some "
-                   "stick/unstick code. Default is true."),
-    llvm::cl::init(true), llvm::cl::cat(OnnxMlirCommonOptions));
+llvm::cl::opt<bool> nnpaDisableCompilerStickUnstick(
+    "disable-compiler-stick-unstick",
+    llvm::cl::desc("Disable the compiler to generate some "
+                   "stick/unstick code. Default is false."),
+    llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));
 
 llvm::cl::opt<bool> nnpaEnableScalarBcastBinary(
     "nnpa-enable-scalar-bcast-binary",
@@ -94,7 +94,7 @@ llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{
 
 llvm::cl::opt<bool> nnpaEnableSaturation("nnpa-saturation",
     llvm::cl::desc("Enable saturating f32 values before stickify them."
-                   "This option turns enable-compiler-stick-unstick on."
+                   "This option turns off disable-compiler-stick-unstick."
                    "Default is false."),
     llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));
 
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp
@@ -69,7 +69,7 @@ extern llvm::cl::OptionCategory OnnxMlirCommonOptions;
 extern llvm::cl::opt<onnx_mlir::NNPAEmissionTargetType> nnpaEmissionTarget;
 extern llvm::cl::opt<bool> nnpaDisableZHighToOnnx;
 extern llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick;
-extern llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick;
+extern llvm::cl::opt<bool> nnpaDisableCompilerStickUnstick;
 extern llvm::cl::opt<bool> nnpaEnableScalarBcastBinary;
 extern llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic;
 extern llvm::cl::opt<bool> profileZHighIR;
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
@@ -52,7 +52,7 @@ void configurePassesNNPA() {
   // z16 does not support for hardware saturation.
   // So, force its usage to compiler generated sticks.
   if (nnpaEnableSaturation && isLessEqualNNPALevel(NNPALevel::M14))
-    nnpaEnableCompilerStickUnstick = true;
+    nnpaDisableCompilerStickUnstick = false;
 
   // Configure ONNXToZHighLoweringPass.
   bool isDynQuant = !nnpaQuantDynamic.empty();
@@ -272,7 +272,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
         pm.addPass(zlow::createZLowRewritePass());
         // Late generation of code for stick/unstick, needed to be after a
         // ZLowRewrite pass.
-        if (nnpaEnableCompilerStickUnstick)
+        if (!nnpaDisableCompilerStickUnstick)
           pm.addPass(zlow::createZLowStickExpansionPass(enableParallel));
         pm.addPass(mlir::createCanonicalizerPass());
         // Normalize MemRefs.
@@ -284,7 +284,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
         pm.addPass(zlow::createZLowRewritePass());
         // The createZLowStickExpansion pass may create parallel constructs,
         // they need to be handled here.
-        if (nnpaEnableCompilerStickUnstick && enableParallel)
+        if (!nnpaDisableCompilerStickUnstick && enableParallel)
           pm.addPass(mlir::createConvertSCFToOpenMPPass());
 
         pm.addPass(mlir::createCanonicalizerPass());
diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp
@@ -561,7 +561,7 @@ struct ZHighToZLowStickOpLowering : public ConversionPattern {
     Value alloc = insertAllocForZMemRef(
         zMemRefType, shapeHelper.getOutputDims(), op, rewriter);
     if (isNHWCLayout(layout)) {
-      if (nnpaEnableCompilerStickUnstick) {
+      if (!nnpaDisableCompilerStickUnstick) {
         // Compiler-generated stick hasn't supported NCHW yet.
         // Explicitly transpose NCHW to NHWC.
         input = create.onnx.toMemref(
@@ -818,7 +818,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern {
     // Allocate a buffer for the result MemRef.
     Value alloc = nullptr;
     if (isNHWCLayout(layout)) {
-      if (nnpaEnableCompilerStickUnstick) {
+      if (!nnpaDisableCompilerStickUnstick) {
         // Compiler-generated unstick hasn't supported NCHW yet.
         // This code allocates a NHWC buffer. It gets dims from the NCHW input.
         SmallVector<IndexExpr> dimList;
@@ -845,7 +845,7 @@ struct ZHighToZLowUnstickOpLowering : public ConversionPattern {
 
     // Emit a ZLow operation.
     rewriter.create<ZLowUnstickOp>(loc, input, alloc, layout);
-    if (isNHWCLayout(layout) && nnpaEnableCompilerStickUnstick)
+    if (isNHWCLayout(layout) && !nnpaDisableCompilerStickUnstick)
       // Compiler-generated unstick hasn't supported NCHW yet.
       // Explicitly transpose NHWC to NCHW.
       alloc =
diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/compiler-stick-unstick.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/compiler-stick-unstick.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=true --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
+// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
 
 func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {
   %0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>
diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/stick-unstick.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/stick-unstick.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
+// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
 
 func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {
   %0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>
diff --git a/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/test-datalayout.mlir b/test/mlir/accelerators/nnpa/conversion/zhigh-to-zlow/test-datalayout.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
+// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file | FileCheck %s
 
 func.func @should_lower_to_zlow_1d(%arg0: tensor<7xf32>) -> tensor<*xf16> {
   %0 = "zhigh.Stick"(%arg0) {layout = "1D"} : (tensor<7xf32>) -> tensor<*xf16>
diff --git a/test/mlir/accelerators/nnpa/driver/ccfd.mlir b/test/mlir/accelerators/nnpa/driver/ccfd.mlir
@@ -1,4 +1,4 @@
-// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
+// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
 
 // COM: This test is to check regression on the IBM CCFD model.
 // COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.
diff --git a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
+// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s
 
 // -----
 
diff --git a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
+// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s | FileCheck %s
 
 // -----
 
diff --git a/test/mlir/accelerators/nnpa/driver/saturation.mlir b/test/mlir/accelerators/nnpa/driver/saturation.mlir
@@ -4,8 +4,8 @@
 // RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitZLowIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=ZLOW_ON %s
 // RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=false --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_OFF %s
 // RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --nnpa-saturation=true --shape-inference --convert-onnx-to-zhigh --zhigh-decompose-stick-unstick %s | FileCheck --check-prefix=DECOMPOSE_ON %s
-// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s
-// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --enable-compiler-stick-unstick --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s
+// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=false --printIR %s | FileCheck --check-prefix=COMPILER_STICK_OFF %s
+// RUN: onnx-mlir --march=z16 --maccel=NNPA --EmitMLIR --nnpa-saturation=true --printIR %s | FileCheck --check-prefix=COMPILER_STICK_ON %s
 
 // COM: for each case, check saturation ON and OFF.
 
diff --git a/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir b/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR %s | FileCheck %s
+// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR %s | FileCheck %s
 
 // Check whether the compiler can remove unstick/stick so that the output of zdnn softmax is passed directly to zdnn matmul.
 func.func @softmax_matmul(%arg0: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=true --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file \| FileCheck %s`
	`1`	`+// RUN: onnx-mlir-opt --march=z16 --maccel=NNPA --shape-inference --convert-onnx-to-krnl --canonicalize %s -split-input-file \| FileCheck %s`
`2`	`2`
`3`	`3`	`func.func @should_lower_to_zlow(%arg0: tensor<1x3x5x7xf32>) -> tensor<*xf32> {`
`4`	`4`	`%0 = "zhigh.Stick"(%arg0) {layout = "NHWC"} : (tensor<1x3x5x7xf32>) -> tensor<*xf16>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} \| FileCheck %s && rm -rf ${ccfd}`
	`1`	`+// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" ${ccfd} \| FileCheck %s && rm -rf ${ccfd}`
`2`	`2`
`3`	`3`	`// COM: This test is to check regression on the IBM CCFD model.`
`4`	`4`	`// COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: onnx-mlir --march=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s \| FileCheck %s`
	`1`	`+// RUN: onnx-mlir --march=z16 --maccel=NNPA --disable-compiler-stick-unstick --EmitMLIR --printIR -tag="test" %s \| FileCheck %s`
`2`	`2`
`3`	`3`	`// -----`
`4`	`4`