Skip to content

Commit dcd5e16

Browse files
Merge commit '553d01d52ed891aa5ee6999d70309e30f8c5e9ea'
2 parents 90e9aec + 553d01d commit dcd5e16

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

lib/Dialect/TritonGPU/Transforms/RemoveLayoutConversions.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1177,10 +1177,10 @@ void LayoutRematerialization::backwardRematerialization(
11771177
// 32 (the width per bank of the shared memory load/store unit).
11781178
int64_t convertLayoutBytes = getByteCount(convertOp.getSrc(), 32, 32);
11791179

1180-
// We measure costs in standardised milli-SM-cycles. This gives:
1181-
// smem load/store: 8 * byte count
1182-
// synchronisation: 1024 (assuming 4 warps per block)
1183-
int64_t convertLayoutCost = 16 * convertLayoutBytes + 1024;
1180+
// We measure costs in standardised milli-SM-cycles. The smem load
1181+
// and store each cost 8 * convertLayoutBytes, and then we double
1182+
// it to account for extra cost due to synchronisation.
1183+
int64_t convertLayoutCost = 32 * convertLayoutBytes;
11841184
int64_t rematerialisationCost = 0;
11851185

11861186
// Evaluate single-use status for every operation in slice

0 commit comments

Comments
 (0)