Addressing PR comments

dhernandez0 · dhernandez0 · commit bde9061b4268 · 2025-04-07T15:50:06.000Z
diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockGemmGemmWrapperInterface.td b/mlir/include/mlir/Dialect/Rock/IR/RockGemmGemmWrapperInterface.td
@@ -178,8 +178,6 @@ def RockGemmGemmWrapperInterface : OpInterface<"RockGemmGemmWrapperInterface"> {
     InterfaceMethod<
         /*desc=*/[{
           Set the tuning parameters attribute of the first GEMM
-
-          This is needed for --affix-tuning-params to work and can go away if it does
         }],
         /*retType=*/"void",
         /*methodName=*/"setGemm0ParamsAttr",
@@ -192,8 +190,6 @@ def RockGemmGemmWrapperInterface : OpInterface<"RockGemmGemmWrapperInterface"> {
     InterfaceMethod<
         /*desc=*/[{
           Set the tuning parameters attribute of the second GEMM
-
-          This is needed for --affix-tuning-params to work and can go away if it does
         }],
         /*retType=*/"void",
         /*methodName=*/"setGemm1ParamsAttr",
diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockOps.td b/mlir/include/mlir/Dialect/Rock/IR/RockOps.td
@@ -225,9 +225,18 @@ def Rock_AttentionOp
       Results<(outs Optional<TensorOf<[F32, F16, BF16]>>:$result)> {
   let summary = "Attention operation of transformer models";
   let description = [{
-    Performs the operation out = SOFTMAX(queries * keys) * values.
+    Performs the operation out = SOFTMAX(preSoftmaxBody(queries * keys, preSoftmaxElemWiseInputs)) * values.
 
-    This operation performs attention mechanism of transformer models.
+    This operation performs attention mechanism of transformer models. There is an optional element-wise 
+    fusion just before the softmax, defined by `preSoftmaxBody` with inputs `preSoftmaxElemWiseInputs`.
+
+    If none of the `transposed` attributes are set, then `queries` is [G] x seq_q x head_qk,
+    `keys` is [G] x head_qk x seq_k, `values` is [G] x seq_k x head_v and `out` is [G] x seq_q x head_v, 
+    where G is the optional group dimension (which is assumed to be 1 if not set).
+
+    The transpose attributes allow for the non-group dimensions of the matrix to be
+    transposed. For example, if `qTransposed` is set, then the argument `queries` should be
+    a [G] x head_qk x seq_q memory.
 
     Those creating a `rock.attention` must specify the GPU architecture being targetted
     and the number of compute units (numCu) available. The parameters
@@ -268,9 +277,18 @@ def Rock_GemmElementwiseGemmOp
       Results<(outs Optional<TensorOf<[F32]>>:$result)> {
   let summary = "GEMM-elementwise-GEMM operation";
   let description = [{
-    Performs the operation out = (a * b) * c.
+    Performs the operation out = preSecondGemmBody(a * b, elemwiseInputs) * c.
+
+    This operation performs fused GEMM-elementwise-GEMM. There is an optional element-wise
+    fusion just before the second GEMM, defined by `preSecondGemmBody` with inputs `elemwiseInputs`.
 
-    This operation performs fused GEMM-elementwise-GEMM. 
+    If none of the `transposed` attributes are set, then `a` is [G] x M x K,
+    `b` is [G] x K x N, `c` is [G] x N x O and `out` is [G] x M x O, where G is the 
+    optional group dimension (which is assumed to be 1 if not set).
+
+    The transpose attributes allow for the non-group dimensions of the matrix to be
+    transposed. For example, if `aTransposed` is set, then the argument `a` should be
+    a [G] x K x M memory.
 
     Those creating a `rock.gemm_elementwise_gemm` must specify the GPU architecture being targetted
     and the number of compute units (numCu) available. The parameters
diff --git a/mlir/lib/Dialect/Rock/IR/RockDialect.cpp b/mlir/lib/Dialect/Rock/IR/RockDialect.cpp
@@ -2124,8 +2124,8 @@ GemmGemmSize GemmElementwiseGemmOp::getGemmGemmSize() {
   return GemmGemmSize(g, m, k, n, o);
 }
 
-static LogicalResult verifyAttentionOp(RockGemmGemmWrapperInterface op,
-                                       Value currentSeqLen) {
+static LogicalResult verifyGemmPlusGemmLikeOp(RockGemmGemmWrapperInterface op,
+                                              Value currentSeqLen) {
   ShapedType qType = cast<ShapedType>(op.getAType());
   int64_t qBatchDim = qType.getShape().size() == 3 ? qType.getShape()[0] : 1;
   ArrayRef<int64_t> qLastDims = qType.getShape().slice(qType.getRank() - 2);
@@ -2194,7 +2194,7 @@ static LogicalResult verifyAttentionOp(RockGemmGemmWrapperInterface op,
 }
 
 LogicalResult GemmElementwiseGemmOp::verify() {
-  return verifyAttentionOp(*this, /*currentSeqLen=*/nullptr);
+  return verifyGemmPlusGemmLikeOp(*this, /*currentSeqLen=*/nullptr);
 }
 
 void GemmElementwiseGemmOp::getEffects(
@@ -2256,7 +2256,7 @@ GemmGemmSize AttentionOp::getGemmGemmSize() {
 }
 
 LogicalResult AttentionOp::verify() {
-  return verifyAttentionOp(*this, getCurrentSeqLen());
+  return verifyGemmPlusGemmLikeOp(*this, getCurrentSeqLen());
 }
 
 void AttentionOp::getEffects(
diff --git a/mlir/tools/rocmlir-gen/rocmlir-gen.cpp b/mlir/tools/rocmlir-gen/rocmlir-gen.cpp
@@ -2425,8 +2425,9 @@ static TosaOp createOpAndInfer(OpBuilder &builder, Location loc, Type elemType,
   return op;
 }
 
-Value addTensorArgToBlock(OpBuilder &builder, Location loc,
-                          Block *preSoftmaxElemwiseBlock, Value funcArg) {
+static Value addTensorArgToBlock(OpBuilder &builder, Location loc,
+                                 Block *preSoftmaxElemwiseBlock,
+                                 Value funcArg) {
   ShapedType funcArgType = cast<ShapedType>(funcArg.getType());
   Value funcArgMemRef = preSoftmaxElemwiseBlock->addArgument(
       MemRefType::get(funcArgType.getShape(), funcArgType.getElementType()),

Original file line number	Diff line number	Diff line change
`@@ -2124,8 +2124,8 @@ GemmGemmSize GemmElementwiseGemmOp::getGemmGemmSize() {`
`2124`	`2124`	`return GemmGemmSize(g, m, k, n, o);`
`2125`	`2125`	`}`
`2126`	`2126`
`2127`		`-static LogicalResult verifyAttentionOp(RockGemmGemmWrapperInterface op,`
`2128`		`- Value currentSeqLen) {`
	`2127`	`+static LogicalResult verifyGemmPlusGemmLikeOp(RockGemmGemmWrapperInterface op,`
	`2128`	`+ Value currentSeqLen) {`
`2129`	`2129`	`ShapedType qType = cast<ShapedType>(op.getAType());`
`2130`	`2130`	`int64_t qBatchDim = qType.getShape().size() == 3 ? qType.getShape()[0] : 1;`
`2131`	`2131`	`ArrayRef<int64_t> qLastDims = qType.getShape().slice(qType.getRank() - 2);`
`@@ -2194,7 +2194,7 @@ static LogicalResult verifyAttentionOp(RockGemmGemmWrapperInterface op,`
`2194`	`2194`	`}`
`2195`	`2195`
`2196`	`2196`	`LogicalResult GemmElementwiseGemmOp::verify() {`
`2197`		`- return verifyAttentionOp(this, /currentSeqLen=*/nullptr);`
	`2197`	`+ return verifyGemmPlusGemmLikeOp(this, /currentSeqLen=*/nullptr);`
`2198`	`2198`	`}`
`2199`	`2199`
`2200`	`2200`	`void GemmElementwiseGemmOp::getEffects(`
`@@ -2256,7 +2256,7 @@ GemmGemmSize AttentionOp::getGemmGemmSize() {`
`2256`	`2256`	`}`
`2257`	`2257`
`2258`	`2258`	`LogicalResult AttentionOp::verify() {`
`2259`		`- return verifyAttentionOp(*this, getCurrentSeqLen());`
	`2259`	`+ return verifyGemmPlusGemmLikeOp(*this, getCurrentSeqLen());`
`2260`	`2260`	`}`
`2261`	`2261`
`2262`	`2262`	`void AttentionOp::getEffects(`