@@ -1648,6 +1648,15 @@ struct LoadOpConversion
1648
1648
usePackedType = true ;
1649
1649
}
1650
1650
1651
+ if (isTransposeRequired) {
1652
+ if (!usePackedType) {
1653
+ // use the d32 transpose 2d load.
1654
+ loadResultElemType = i32_ty;
1655
+ packedElemsPerLanePerDPASInst = 32 / elemSizeInBits;
1656
+ usePackedType = true ;
1657
+ }
1658
+ }
1659
+
1651
1660
Type packedDPASOperandType =
1652
1661
LLVM::getVectorType (loadResultElemType, packedElemsPerLanePerDPASInst);
1653
1662
@@ -2105,6 +2114,10 @@ struct LoadOpConversion
2105
2114
rewriter.eraseOp (load2dOp);
2106
2115
return failure ();
2107
2116
}
2117
+ #if 0
2118
+ targetInfo.printf(rewriter, "base: %p, baseWidth: %d, baseHeight:%d, pitch:%d, offset_x:%d, offset_y:%d, loadVal: %d",
2119
+ {base, base_width, baseHeight, base_pitch, offsetX, offsetY, load2dOp.getResult()});
2120
+ #endif
2108
2121
LLVM_DEBUG (llvm::dbgs () << " Generated load op: " << load2dOp << " \n " );
2109
2122
2110
2123
unsigned packedRowNum = opIdx == DpasEncodingAttr::OpIdx::OperandA
@@ -2166,11 +2179,14 @@ struct LoadOpConversion
2166
2179
vblk * packedColNumPerVBlock + col)
2167
2180
<< " , " << std::to_string (k + row) << " \n " ;
2168
2181
});
2182
+ auto ret = b.bitcast (loadVal, unpackedDPASOperandType);
2183
+ #if 0
2184
+ targetInfo.printf(rewriter, "loadVal: %d", {ret});
2185
+ #endif
2169
2186
loadVals[{outer * packedColNum * numLoadPerOutRepCluster +
2170
2187
rep * packedColNum +
2171
2188
vblk * packedColNumPerVBlock + col,
2172
- k + row}] =
2173
- b.bitcast (loadVal, unpackedDPASOperandType);
2189
+ k + row}] = ret;
2174
2190
} break ;
2175
2191
case DpasEncodingAttr::OpIdx::OperandC: {
2176
2192
llvm_unreachable (" unexpected OpIdx::OperandC" );
0 commit comments