@@ -49,9 +49,6 @@ SmallVector<Value> swizzleIndices(ConversionPatternRewriter &rewriter,
4949
5050 auto fastIdx = rawIndices[order[0 ]];
5151 auto secondIdx = rawIndices[order[1 ]];
52- // Original algorithm taken from getSwizzledSharedPtrs function
53- // (TritonGPUToLLVMBase.h)
54- //
5552 // phase = (secondIdx // perPhase) % maxPhase
5653 // swizzledGroup = ((fastIdx // vec) ^ phase) * vec
5754 // groupRemainder = fastIdx % vec
@@ -158,7 +155,7 @@ Value computeSwizzledOffset(ConversionPatternRewriter &rewriter, Location loc,
158155 ArrayRef<int64_t > opTensorShape,
159156 ArrayRef<Value> strides) {
160157 Value offset = i32_val (0 );
161- // Compute unswizzled multi dim coordinates in shared memmory object
158+ // Compute unswizzled multi dim coordinates in shared memory object
162159 SmallVector<Value> elemMultiDimIndices (3 );
163160 elemMultiDimIndices[dim.batch ] =
164161 add (bTileOffset, i32_val (i.bTile * shapePerCTABTile + i.b ));
@@ -309,7 +306,7 @@ Value loadFMAOp(Value srcVal, Value llVal, BlockedEncodingAttr dLayout,
309306 sizeNonKPerThread);
310307
311308 // In swizzled memory case basePtr stores pointer to the beginning of shared
312- // memmory object.
309+ // memory object.
313310 //
314311 // If memory is not swizzled, algorithm breaks element offset pointer into
315312 // constant and non-constant part. Non-constant (depends on thread id) part is
0 commit comments