77#include " iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h"
88
99#include " iree/compiler/Codegen/Common/GPU/GPUHeuristics.h"
10+ #include " iree/compiler/Codegen/Common/TensorDynamicDimAnalysis.h"
1011#include " iree/compiler/Codegen/Common/TileInferenceUtils.h"
1112#include " iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
1213#include " iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.h"
2021#include " iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
2122#include " iree/compiler/Dialect/LinalgExt/Utils/MatchUtils.h"
2223#include " iree/compiler/Dialect/LinalgExt/Utils/Utils.h"
24+ #include " iree/compiler/Dialect/Util/IR/UtilOps.h"
25+ #include " llvm/ADT/DenseSet.h"
2326#include " llvm/ADT/STLExtras.h"
2427#include " llvm/Support/Casting.h"
2528#include " llvm/Support/DebugLog.h"
2629#include " llvm/Support/InterleavedRange.h"
30+ #include " mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
31+ #include " mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
32+ #include " mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
33+ #include " mlir/Analysis/DataFlowFramework.h"
2734#include " mlir/Dialect/Linalg/Utils/Utils.h"
2835#include " mlir/Dialect/Utils/IndexingUtils.h"
2936#include " mlir/IR/Attributes.h"
@@ -45,6 +52,14 @@ namespace mlir::iree_compiler::IREE::GPU {
4552constexpr int64_t kCacheLineSizeBits = 128 * 8 ;
4653constexpr int64_t kPreferredCopyNumBits = 128 ;
4754
55+ // Sentinel value used by IntegerRangeAnalysis when bounds are unknown.
56+ static constexpr uint64_t MAX_DIM_VALUE = (static_cast <uint64_t >(1 ) << 53 ) - 1 ;
57+
58+ // Fallback bound when IntegerRangeAnalysis cannot determine the actual value.
59+ // Kept small (2^14) to avoid int64_t overflow when dimensions are multiplied
60+ // together in heuristic calculations.
61+ static constexpr uint64_t MAX_BOUND_VALUE = static_cast <uint64_t >(1 ) << 14 ;
62+
4863// ===----------------------------------------------------------------------===//
4964// Lowering Config Selection
5065// ===----------------------------------------------------------------------===//
@@ -653,7 +668,8 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
653668 ArrayRef<int64_t > bounds, ArrayRef<AffineMap> maps,
654669 ArrayRef<Value> operands, IREE::GPU::TargetAttr target, bool useDirectLoad,
655670 bool isGemm, bool scaled, int64_t splitReductionTripCnt,
656- bool cPromoteIfPadding, bool hasExistingAccumulator = false ,
671+ bool cPromoteIfPadding, bool boundsUsingAnalysis,
672+ bool hasExistingAccumulator = false ,
657673 std::optional<ConvToIgemmInfo> convToIgemmInfo = std::nullopt ) {
658674 if (target.getWgp ().getMma ().empty ()) {
659675 return failure ();
@@ -969,7 +985,7 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
969985 : ArrayRef<Attribute>{};
970986 GPU::appendPromotedOperandsList (context, attrs, promotionList,
971987 promotionTypes);
972- if (!mustBeAligned || couldNeedPadding) {
988+ if (!mustBeAligned || couldNeedPadding || boundsUsingAnalysis ) {
973989 SmallVector<int64_t > paddingTileSizes = workgroupTileSizes;
974990
975991 // Initialize inner and outer padding sizes from reductionTileSizes.
@@ -1085,7 +1101,8 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
10851101 igemmLoopBounds, igemmContractionMaps, igemmOperands, target,
10861102 useDirectLoad, /* isGemm=*/ false ,
10871103 /* scaled=*/ false , splitReductionTripCnt,
1088- /* cPromoteIfPadding=*/ cPromoteIfPadding, hasExistingAccumulator,
1104+ /* cPromoteIfPadding=*/ cPromoteIfPadding,
1105+ /* boundsUsingAnalysis=*/ false , hasExistingAccumulator,
10891106 convToIgemmInfo);
10901107 if (failed (configAndWgSize)) {
10911108 return failure ();
@@ -1112,6 +1129,68 @@ LogicalResult setIGEMMConvolutionLoweringConfig(
11121129 workgroupSize, targetSubgroupSize, pipelineConfig);
11131130}
11141131
1132+ static FailureOr<SmallVector<int64_t >>
1133+ getLoopBoundsWithRangeAnalysis (linalg::LinalgOp linalgOp,
1134+ mlir::FunctionOpInterface entryPoint) {
1135+ // Use TensorDynamicDimAnalysis for cleaner range queries.
1136+ TensorDynamicDimAnalysis dynamicDimAnalysis (entryPoint);
1137+ if (failed (dynamicDimAnalysis.run ())) {
1138+ return linalgOp.getStaticLoopRanges ();
1139+ }
1140+
1141+ SmallVector<int64_t > bounds = linalgOp.getStaticLoopRanges ();
1142+ SmallVector<AffineMap> indexingMaps = linalgOp.getIndexingMapsArray ();
1143+
1144+ for (auto [loopIdx, bound] : llvm::enumerate (bounds)) {
1145+ if (!ShapedType::isDynamic (bound)) {
1146+ continue ;
1147+ }
1148+
1149+ bool boundRefined = false ;
1150+
1151+ // Find operand and dimension that corresponds to this loop.
1152+ for (auto [operandIdx, operand] :
1153+ llvm::enumerate (linalgOp->getOperands ())) {
1154+ auto shapedType = dyn_cast<ShapedType>(operand.getType ());
1155+ if (!shapedType) {
1156+ continue ;
1157+ }
1158+
1159+ AffineMap map = indexingMaps[operandIdx];
1160+ for (auto [dimIdx, expr] : llvm::enumerate (map.getResults ())) {
1161+ auto dimExpr = dyn_cast<AffineDimExpr>(expr);
1162+ if (!dimExpr || dimExpr.getPosition () != loopIdx) {
1163+ continue ;
1164+ }
1165+ if (!ShapedType::isDynamic (shapedType.getDimSize (dimIdx))) {
1166+ continue ;
1167+ }
1168+
1169+ // Use TensorDynamicDimAnalysis to get range info directly.
1170+ if (auto range = dynamicDimAnalysis.getRangeInfo (operand, dimIdx)) {
1171+ int64_t ub = range->smax ().getSExtValue ();
1172+ if (ub > 0 && ub < MAX_DIM_VALUE) {
1173+ bounds[loopIdx] = ub;
1174+ boundRefined = true ;
1175+ break ;
1176+ }
1177+ }
1178+ }
1179+
1180+ if (boundRefined) {
1181+ break ;
1182+ }
1183+ }
1184+
1185+ // If we couldn't refine the bound, set it to a large value.
1186+ if (!boundRefined && ShapedType::isDynamic (bounds[loopIdx])) {
1187+ bounds[loopIdx] = MAX_BOUND_VALUE;
1188+ }
1189+ }
1190+
1191+ return bounds;
1192+ }
1193+
11151194LogicalResult setMatmulLoweringConfig (IREE::GPU::TargetAttr target,
11161195 mlir::FunctionOpInterface entryPoint,
11171196 Operation *op, bool useDirectLoad) {
@@ -1122,7 +1201,15 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11221201 return failure ();
11231202 }
11241203
1204+ // Use IntegerRangeAnalysis to get better bounds for dynamic shapes.
1205+ bool boundsUsingAnalysis = false ;
1206+ FailureOr<SmallVector<int64_t >> maybeBounds =
1207+ getLoopBoundsWithRangeAnalysis (linalgOp, entryPoint);
11251208 SmallVector<int64_t > bounds = linalgOp.getStaticLoopRanges ();
1209+ if (succeeded (maybeBounds) && (maybeBounds != bounds)) {
1210+ boundsUsingAnalysis = true ;
1211+ bounds = std::move (*maybeBounds);
1212+ }
11261213 SmallVector<AffineMap> maps = linalgOp.getIndexingMapsArray ();
11271214 SmallVector<Value> operands (linalgOp->getOperands ());
11281215
@@ -1144,7 +1231,7 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11441231 getMatmulOrIGEMMLoweringConfigAndWorkgroupSize (
11451232 bounds, maps, operands, target, useDirectLoad, /* isGemm=*/ true ,
11461233 /* scaled=*/ false , splitReductionTripCnt, cPromoteIfPadding,
1147- hasExistingAccumulator);
1234+ boundsUsingAnalysis, hasExistingAccumulator);
11481235
11491236 // TODO (muzasyed) : add generalization for scaled and nonscaled versions of
11501237 // matmul lowering.
@@ -1155,7 +1242,7 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
11551242 configAndWgSize = getMatmulOrIGEMMLoweringConfigAndWorkgroupSize (
11561243 bounds, maps, operands, target, useDirectLoad, /* isGemm=*/ true ,
11571244 /* scaled=*/ true , splitReductionTripCnt, cPromoteIfPadding,
1158- hasExistingAccumulator);
1245+ boundsUsingAnalysis, hasExistingAccumulator);
11591246 }
11601247
11611248 if (failed (configAndWgSize)) {
0 commit comments