1212
1313#include " mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
1414
15+ #include " mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
16+ #include " mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
1517#include " mlir/Analysis/SliceAnalysis.h"
1618#include " mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
1719#include " mlir/Dialect/Affine/Analysis/AffineStructures.h"
1820#include " mlir/Dialect/Affine/Analysis/NestedMatcher.h"
1921#include " mlir/Dialect/Affine/IR/AffineOps.h"
2022#include " mlir/Dialect/Affine/IR/AffineValueMap.h"
21- #include " mlir/Dialect/GPU/IR/GPUDialect .h"
23+ #include " mlir/Interfaces/FunctionInterfaces .h"
2224#include " llvm/Support/MathExtras.h"
2325
2426#include " llvm/ADT/DenseSet.h"
3133
3234using namespace mlir ;
3335using namespace mlir ::affine;
36+ using namespace mlir ::dataflow;
3437
3538#define DEBUG_TYPE " affine-loop-analysis"
3639
@@ -85,48 +88,54 @@ void mlir::affine::getTripCountMapAndOperands(
8588 tripCountValueMap.getOperands ().end ());
8689}
8790
88- // / Replace thread_id with its maximum value, if `replaceWithZero` is true,
89- // / thread_id will be replaced by its minimum value 0.
90- static void replaceGPUOperands (AffineForOp forOp,
91- SmallVectorImpl<Value> &operands,
92- SmallVectorImpl<AffineExpr> &symReplacements,
93- unsigned numDim, bool replaceWithZero = false ) {
94- auto launchOp = forOp->getParentOfType <gpu::LaunchOp>();
95- if (!launchOp)
91+ // / By running `IntegerRangeAnalysis` to get the ranges of operand, then fill
92+ // / the `symReplacements` with range. If `replaceByMin` is set to true,
93+ // / construct `replacement` using the smallest value.By default, the largest
94+ // / value will be used for constructing `replacement`.
95+ static void replaceOperandByRange (AffineForOp forOp,
96+ SmallVectorImpl<Value> &operands,
97+ SmallVectorImpl<AffineExpr> &symReplacements,
98+ unsigned numDim, bool replaceByMin = false ) {
99+ DataFlowSolver solver;
100+ solver.load <DeadCodeAnalysis>();
101+ solver.load <IntegerRangeAnalysis>();
102+ if (failed (solver.initializeAndRun (
103+ forOp->getParentOfType <FunctionOpInterface>())))
96104 return ;
97105
98- // `b` is only used to create `AffineExpr`.
106+ // `b` is used to create affineExpr
99107 Builder b (forOp.getContext ());
100- unsigned idx = 0 ;
101-
102108 for (unsigned i = numDim, e = operands.size (); i < e; ++i) {
103109 Value operand = operands[i];
104- if (Value blockSize = launchOp.getBlockSizeOnAxis (operand)) {
105- operands[i] = blockSize;
106- if (!replaceWithZero)
107- symReplacements.push_back (b.getAffineSymbolExpr (idx++) - 1 );
108- else
109- symReplacements.push_back (b.getAffineConstantExpr (0 ));
110+ auto lattice =
111+ solver.lookupState <dataflow::IntegerValueRangeLattice>(operand);
112+ if (!lattice) {
113+ symReplacements.push_back (b.getAffineSymbolExpr (i - numDim));
110114 continue ;
111115 }
112116
113- Operation *defOp = operand.getDefiningOp ();
114- if (!defOp) {
115- ++idx;
117+ if (lattice->getValue ().isUninitialized ()) {
118+ symReplacements.push_back (b.getAffineSymbolExpr (i - numDim));
116119 continue ;
117120 }
118121
119- if (auto threadIdOp = mlir::dyn_cast<gpu::ThreadIdOp>(defOp)) {
120- gpu::Dimension dimension = threadIdOp.getDimension ();
121- operands[i] = launchOp.getBlockSizeOnAxis (dimension);
122- if (!replaceWithZero)
123- symReplacements.push_back (b.getAffineSymbolExpr (idx++) - 1 );
124- else
125- symReplacements.push_back (b.getAffineConstantExpr (0 ));
122+ ConstantIntRanges range = lattice->getValue ().getValue ();
123+ APInt max = range.smax ();
124+ APInt min = range.smin ();
125+ unsigned bitNums = max.getBitWidth ();
126+
127+ if (APInt::getSignedMaxValue (bitNums) == max &&
128+ APInt::getSignedMinValue (bitNums) == min) {
129+ symReplacements.push_back (b.getAffineSymbolExpr (i - numDim));
126130 continue ;
127131 }
128- ++idx;
132+
133+ if (!replaceByMin)
134+ symReplacements.push_back (b.getAffineConstantExpr (max.getZExtValue ()));
135+ else
136+ symReplacements.push_back (b.getAffineConstantExpr (min.getZExtValue ()));
129137 }
138+ return ;
130139}
131140
132141// / Take the min if all trip counts are constant.
@@ -158,32 +167,28 @@ std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
158167 if (!map)
159168 return std::nullopt ;
160169 SmallVector<AffineExpr, 4 > symReplacements;
161- replaceGPUOperands (forOp, operands, symReplacements, map.getNumDims ());
170+ replaceOperandByRange (forOp, operands, symReplacements, map.getNumDims ());
162171 map = map.replaceDimsAndSymbols ({}, symReplacements, map.getNumDims (),
163172 map.getNumSymbols ());
164- affine::AffineValueMap valueMap (map, operands);
165- (void )valueMap.canonicalize ();
166- map = valueMap.getAffineMap ();
167173 return getConstantTripCountFromAffineMap (map);
168174}
169175
170- // / In some scenarios, such as GPU, the number of trip of each thread in the
171- // / loop is inconsistent. This function returns the maximum number of trip.
176+ // / Returns the maximum trip count when the operand of forOp has a range. If the
177+ // / operand of forOp is a constant, the return value is the same as
178+ // / `getConstantTripCount`.
172179std::optional<uint64_t >
173- mlir::affine::getMaxConstantTripCount (AffineForOp forOp) {
180+ mlir::affine::getUpperBoundOnTripCount (AffineForOp forOp) {
174181 SmallVector<Value, 4 > operands;
175182 AffineMap map;
176183 getTripCountMapAndOperands (forOp, &map, &operands);
177184
178185 if (!map)
179186 return std::nullopt ;
180187 SmallVector<AffineExpr, 4 > symReplacements;
181- replaceGPUOperands (forOp, operands, symReplacements, map.getNumDims (), true );
188+ replaceOperandByRange (forOp, operands, symReplacements, map.getNumDims (),
189+ true );
182190 map = map.replaceDimsAndSymbols ({}, symReplacements, map.getNumDims (),
183191 map.getNumSymbols ());
184- affine::AffineValueMap valueMap (map, operands);
185- (void )valueMap.canonicalize ();
186- map = valueMap.getAffineMap ();
187192 return getConstantTripCountFromAffineMap (map);
188193}
189194
@@ -198,12 +203,9 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
198203 if (!map)
199204 return 1 ;
200205 SmallVector<AffineExpr, 4 > symReplacements;
201- replaceGPUOperands (forOp, operands, symReplacements, map.getNumDims ());
206+ replaceOperandByRange (forOp, operands, symReplacements, map.getNumDims ());
202207 map = map.replaceDimsAndSymbols ({}, symReplacements, map.getNumDims (),
203208 map.getNumSymbols ());
204- affine::AffineValueMap valueMap (map, operands);
205- (void )valueMap.canonicalize ();
206- map = valueMap.getAffineMap ();
207209 // The largest divisor of the trip count is the GCD of the individual largest
208210 // divisors.
209211 assert (map.getNumResults () >= 1 && " expected one or more results" );
0 commit comments