File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
lib/Dialect/TritonGPU/Transforms Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -1177,10 +1177,10 @@ void LayoutRematerialization::backwardRematerialization(
11771177 // 32 (the width per bank of the shared memory load/store unit).
11781178 int64_t convertLayoutBytes = getByteCount (convertOp.getSrc (), 32 , 32 );
11791179
1180- // We measure costs in standardised milli-SM-cycles. This gives:
1181- // smem load/ store: 8 * byte count
1182- // synchronisation: 1024 (assuming 4 warps per block)
1183- int64_t convertLayoutCost = 16 * convertLayoutBytes + 1024 ;
1180+ // We measure costs in standardised milli-SM-cycles. The smem load
1181+ // and store each cost 8 * convertLayoutBytes, and then we double
1182+ // it to account for extra cost due to synchronisation.
1183+ int64_t convertLayoutCost = 32 * convertLayoutBytes;
11841184 int64_t rematerialisationCost = 0 ;
11851185
11861186 // Evaluate single-use status for every operation in slice
You can’t perform that action at this time.
0 commit comments