Skip to content

Commit 13419bb

Browse files
committed
Fix wrong value in supported bit width for global.to.lds
1 parent ea02c3c commit 13419bb

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

third_party/amd/lib/TritonAMDGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ struct AsyncCopyGlobalToLocalOpConversion
424424
supportedWidths.insert(16);
425425
supportedWidths.insert(32);
426426
if (targetInfo.getGPUKind() == llvm::AMDGPU::GPUKind::GK_GFX950) {
427-
supportedWidths.insert(98);
427+
supportedWidths.insert(96);
428428
supportedWidths.insert(128);
429429
}
430430
break;
@@ -545,9 +545,9 @@ struct AsyncCopyGlobalToLocalOpConversion
545545
rewriter.create<LLVM::CondBrOp>(loc, maskElems[srcIdx], loadBlock,
546546
afterLoad);
547547
rewriter.setInsertionPointToStart(loadBlock);
548-
rewriter.create<ROCDL::GlobalLoadLDSOp>(loc, srcPtr, shmemAddrs[i],
549-
vecBytesVal, b.i32_val(0),
550-
cacheModifiers);
548+
rewriter.create<ROCDL::GlobalLoadLDSOp>(
549+
loc, srcPtr, shmemAddrs[i], vecBytesVal, /*offset=*/b.i32_val(0),
550+
cacheModifiers);
551551

552552
rewriter.create<LLVM::BrOp>(loc, afterLoad);
553553
rewriter.setInsertionPointToStart(afterLoad);

0 commit comments

Comments
 (0)