openxla
diff --git a/‎WORKSPACE.bazel‎
Lines changed: 2 additions & 2 deletions b/‎WORKSPACE.bazel‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎build_tools/llvm_version.txt‎
Lines changed: 1 addition & 1 deletion b/‎build_tools/llvm_version.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/generated/stablehlo_linalg_passes.md‎
Lines changed: 3 additions & 2 deletions b/‎docs/generated/stablehlo_linalg_passes.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/generated/stablehlo_optimization_passes.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/generated/stablehlo_optimization_passes.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎stablehlo/conversions/linalg/tests/pointwise.mlir‎
Lines changed: 31 additions & 0 deletions b/‎stablehlo/conversions/linalg/tests/pointwise.mlir‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎stablehlo/conversions/linalg/transforms/LegalizeToLinalgUtils.cpp‎
Lines changed: 4 additions & 5 deletions b/‎stablehlo/conversions/linalg/transforms/LegalizeToLinalgUtils.cpp‎
Lines changed: 4 additions & 5 deletions
@@ -17,9 +17,9 @@ workspace(name = "stablehlo")
 
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
-LLVM_COMMIT = "113f01aa82d055410f22a9d03b3468fa68600589"
+LLVM_COMMIT = "3a6b818132e3133c7d33f8f577e62503f12869b4"
 
-LLVM_SHA256 = "9aee00a35aa76639746589c6d09e8c18249be16b5b6aa6b788a570a4bc6c4543"
+LLVM_SHA256 = "a0b3de698393e0f49d0aca3f869cc03bf0c59eba0c65f608e565278943c31958"
 
 http_archive(
     name = "llvm-raw",
 
@@ -1 +1 @@
-113f01aa82d055410f22a9d03b3468fa68600589
+3a6b818132e3133c7d33f8f577e62503f12869b4
@@ -7,6 +7,7 @@ _Legalize StableHLO to LinAlg_
 #### Options
 
 ```
--enable-primitive-ops : Lower to primitive Linalg ops (map, reduce and transpose) when possible, instead of linalg.generic
--enable-sparse-ops    : Lower to Sparse Tensor ops (sparse_tensor.concatenate)when possible, instead of linalg.generic
+-enable-primitive-ops  : Lower to primitive Linalg ops (map, reduce and transpose) when possible, instead of linalg.generic
+-enable-sparse-ops     : Lower to Sparse Tensor ops (sparse_tensor.concatenate)when possible, instead of linalg.generic
+-capture-scalar-inputs : Capture scalar inputs in generic ops instead ofpassing as tensor-scalar argument.
 ```
@@ -8,7 +8,7 @@ _Folds StableHLO operations_
 
 ```
 -assume-no-undeclared-side-effects : Allow dead code to be eliminated in some situations (e.g. dead while loops) under the assumption that ops are pure unless declared with explicit MLIR `MemoryEffects`. Notably, this means `func.call` ops will be assumed pure.
--fold-op-element-limit             : Folding an op into a constant can sometimes come at the cost of memory overhead. (This occurs if the op's inputs are reused, meaning that they can't be deleted after the op is folded to a constant, or when folding operations like `iota` whose outputs take up more memory than their inputs.) In such cases, this config option sets an upper limit on how many elements an op's result may have before the op is no longer folded.
+-fold-op-element-limit             : Folding an op into a constant can sometimes come at the cost of memory overhead. (This occurs if the op's inputs are reused, meaning that they can't be deleted after the op is folded to a constant, or when folding operations like `concat` whose outputs take up more memory than their inputs.) In such cases, this config option sets an upper limit on how many elements an op's result may have before the op is no longer folded. Splat folds are exempt from this limit.
 -optimize-float                    : Allow float optimizations that, though mathematically equivalent, may result in slightly different quantization of floating-point values (e.g. `log(sqrt(x))` -> `0.5 * log(x)`). Float optimizations that can't affect numerical results are always enabled.
 ```
 
@@ -105,7 +105,7 @@ high coverage of the pass today.
 #### Options
 
 ```
--fold-op-element-limit : Folding an op into a constant can sometimes come at the cost of memory overhead. (This occurs if the op's inputs are reused, meaning that they can't be deleted after the op is folded to a constant, or when folding operations like `iota` whose outputs take up more memory than their inputs.) In such cases, this config option sets an upper limit on how many elements an op's result may have before the op is no longer folded.
+-fold-op-element-limit : Folding an op into a constant can sometimes come at the cost of memory overhead. (This occurs if the op's inputs are reused, meaning that they can't be deleted after the op is folded to a constant, or when folding operations like `concat` whose outputs take up more memory than their inputs.) In such cases, this config option sets an upper limit on how many elements an op's result may have before the op is no longer folded. Splat folds are exempt from this limit.
 ```
 
 ### `-stablehlo-target-independent-optimization`
@@ -123,6 +123,6 @@ Users should prefer this pass to calling the others directly.
 
 ```
 -assume-no-undeclared-side-effects : Allow dead code to be eliminated in some situations (e.g. dead while loops) under the assumption that ops are pure unless declared with explicit MLIR `MemoryEffects`. Notably, this means `func.call` ops will be assumed pure.
--fold-op-element-limit             : Folding an op into a constant can sometimes come at the cost of memory overhead. (This occurs if the op's inputs are reused, meaning that they can't be deleted after the op is folded to a constant, or when folding operations like `iota` whose outputs take up more memory than their inputs.) In such cases, this config option sets an upper limit on how many elements an op's result may have before the op is no longer folded.
+-fold-op-element-limit             : Folding an op into a constant can sometimes come at the cost of memory overhead. (This occurs if the op's inputs are reused, meaning that they can't be deleted after the op is folded to a constant, or when folding operations like `concat` whose outputs take up more memory than their inputs.) In such cases, this config option sets an upper limit on how many elements an op's result may have before the op is no longer folded. Splat folds are exempt from this limit.
 -optimize-float                    : Allow float optimizations that, though mathematically equivalent, may result in slightly different quantization of floating-point values (e.g. `log(sqrt(x))` -> `0.5 * log(x)`). Float optimizations that can't affect numerical results are always enabled.
 ```
@@ -1,5 +1,6 @@
 // RUN: stablehlo-opt %s --stablehlo-legalize-to-linalg --split-input-file --canonicalize | FileCheck %s
 // RUN: stablehlo-opt %s --stablehlo-legalize-to-linalg="enable-primitive-ops=true" --split-input-file --canonicalize | FileCheck %s --check-prefix=CHECK-PRIMITIVE
+// RUN: stablehlo-opt %s --stablehlo-legalize-to-linalg="capture-scalar-inputs=false" --split-input-file --canonicalize | FileCheck %s --check-prefix=CHECK-NO-CAPTURE
 
 // CHECK: #map = affine_map<(d0, d1) -> (d0, d1)>
 // CHECK-LABEL: func @float_add
@@ -538,6 +539,19 @@ func.func @complex_sign(
 
 // -----
 
+// CHECK-LABEL: func @float_tan
+// CHECK-PRIMITIVE-LABEL: func @float_tan
+func.func @float_tan(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
+  // CHECK: linalg.generic
+  // CHECK: tan
+  // CHECK-PRIMITIVE: linalg.map
+  // CHECK-PRIMITIVE: tan
+  %0 = "stablehlo.tan"(%arg0) : (tensor<2x2xf32>) -> tensor<2x2xf32>
+  func.return %0 : tensor<2x2xf32>
+}
+
+// -----
+
 // CHECK-LABEL: func @float_tanh
 // CHECK-PRIMITIVE-LABEL: func @float_tanh
 func.func @float_tanh(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
@@ -927,6 +941,23 @@ func.func @select_scalar_pred_dyn(%pred : tensor<i1>, %lhs: tensor<2x?xf32>, %rh
 // CHECK-PRIMITIVE:        %[[RES:.*]] = arith.select %[[PRED_ELEM]], %[[LHS_]], %[[RHS_]] : f32
 // CHECK-PRIMITIVE:        linalg.yield %[[RES]]
 
+// CHECK-NO-CAPTURE:      #[[SCALAR_MAP:.*]] = affine_map<(d0, d1) -> ()>
+// CHECK-NO-CAPTURE:      #[[ID_MAP:.*]] = affine_map<(d0, d1) -> (d0, d1)>
+// CHECK-NO-CAPTURE:      func @select_scalar_pred_dyn
+// CHECK-NO-CAPTURE-SAME:  (%[[PRED:.*]]: tensor<i1>, %[[LHS:.*]]: tensor<2x?xf32>, %[[RHS:.*]]: tensor<2x?xf32>)
+// CHECK-NO-CAPTURE-DAG:  %[[C1:.*]] = arith.constant 1
+// CHECK-NO-CAPTURE-DAG:  %[[DIM:.*]] =  tensor.dim %[[LHS]], %[[C1]]
+// CHECK-NO-CAPTURE-DAG:  %[[DST:.*]] = tensor.empty(%[[DIM]])
+// CHECK-NO-CAPTURE:      linalg.generic
+// CHECK-NO-CAPTURE-SAME:   indexing_maps = [#[[SCALAR_MAP]], #[[ID_MAP]], #[[ID_MAP]], #[[ID_MAP]]]
+// CHECK-NO-CAPTURE-SAME:   iterator_types = ["parallel", "parallel"]
+// CHECK-NO-CAPTURE-SAME:   ins(%[[PRED]], %[[LHS]], %[[RHS]] : tensor<i1>, tensor<2x?xf32>, tensor<2x?xf32>)
+// CHECK-NO-CAPTURE-SAME:   outs(%[[DST]] : tensor<2x?xf32>)
+// CHECK-NO-CAPTURE-SAME:   {someattr}
+// CHECK-NO-CAPTURE:      ^bb0(%[[PRED_:.*]]: i1, %[[LHS_:.*]]: f32, %[[RHS_:.*]]: f32, %{{.*}}: f32):
+// CHECK-NO-CAPTURE:        %[[RES:.*]] = arith.select %[[PRED_]], %[[LHS_]], %[[RHS_]] : f32
+// CHECK-NO-CAPTURE:        linalg.yield %[[RES]]
+
 // -----
 
 // CHECK:     func @select_scalar_pred_static
 
@@ -140,12 +140,11 @@ Value preSparsify(Operation* op, llvm::SmallVector<Value, 2>& values, Type rtp,
   // (any sign-op, or an integral abs-op).
   // TODO(peiming, ajcbik): these all can potentially be optimized by applying
   // value transform on sparse_tenosr.value memref
-  if (isa<mlir::stablehlo::SignOp>(op) || isa<mlir::stablehlo::NegOp>(op) ||
+  if (isa<mlir::stablehlo::SignOp, mlir::stablehlo::NegOp,
+          mlir::stablehlo::TanOp>(op) ||
       (isa<mlir::stablehlo::AbsOp>(op) && hasIntegralShapeType(op)) ||
-      isa<chlo::AsinOp>(op) || isa<chlo::AsinhOp>(op) ||
-      isa<chlo::AtanOp>(op) || isa<chlo::AtanhOp>(op) ||
-      isa<chlo::BesselI1eOp>(op) || isa<chlo::SinhOp>(op) ||
-      isa<chlo::TanOp>(op)) {
+      isa<chlo::AsinOp, chlo::AsinhOp, chlo::AtanOp, chlo::AtanhOp,
+          chlo::BesselI1eOp, chlo::SinhOp, chlo::TanOp>(op)) {
     if (!sparse_tensor::getSparseTensorEncoding(op->getResult(0).getType()) &&
         !sparse_tensor::getSparseTensorEncoding(op->getOperand(0).getType()))
       return Value();
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-113f01aa82d055410f22a9d03b3468fa68600589`
	`1`	`+3a6b818132e3133c7d33f8f577e62503f12869b4`