@@ -639,10 +639,9 @@ struct AsyncCopyGlobalToLocalOpConversion
639639 (vecTy.getNumElements () * vecTy.getElementTypeBitWidth ()) / 8 ;
640640 assert (llvm::isPowerOf2_32 (vecBytes));
641641 Value vecBytesVal = b.i32_val (vecBytes);
642-
643- Value cacheModifiers =
644- b.i32_val (mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget (
645- op.getCache (), /* isLoad=*/ true , targetInfo));
642+ int32_t cacheModifiers =
643+ mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget (
644+ op.getCache (), /* isLoad=*/ true , targetInfo);
646645
647646 Value llMask = adaptor.getMask ();
648647 SmallVector<Value> maskElems;
@@ -680,7 +679,7 @@ struct AsyncCopyGlobalToLocalOpConversion
680679 auto globalLoadLdsOp = rewriter.create <ROCDL::GlobalLoadLDSOp>(
681680 loc,
682681 /* globalPtr=*/ srcPtr, /* ldsPtr=*/ coalescedShmemAddr[i],
683- /* size=*/ vecBytesVal , /* offset=*/ b. i32_val ( 0 ) ,
682+ /* size=*/ vecBytes , /* offset=*/ 0 ,
684683 /* aux=*/ cacheModifiers, /* alias_scopes=*/ nullptr ,
685684 /* noalias_scopes=*/ nullptr , /* tbaa=*/ nullptr );
686685 LLVM::AMD::addAsyncCopyAliasScope (globalLoadLdsOp);
@@ -695,8 +694,8 @@ struct AsyncCopyGlobalToLocalOpConversion
695694 rewriter.create <LLVM::CondBrOp>(loc, pred, loadBlock, afterLoad);
696695 rewriter.setInsertionPointToStart (loadBlock);
697696 auto globalLoadLdsOp = rewriter.create <ROCDL::GlobalLoadLDSOp>(
698- loc, srcPtr, coalescedShmemAddr[i], vecBytesVal ,
699- /* offset=*/ b. i32_val ( 0 ) , cacheModifiers, nullptr , nullptr , nullptr );
697+ loc, srcPtr, coalescedShmemAddr[i], vecBytes ,
698+ /* offset=*/ 0 , cacheModifiers, nullptr , nullptr , nullptr );
700699 LLVM::AMD::addAsyncCopyAliasScope (globalLoadLdsOp);
701700
702701 rewriter.create <LLVM::BrOp>(loc, afterLoad);
0 commit comments