@@ -639,9 +639,10 @@ struct AsyncCopyGlobalToLocalOpConversion
639639 (vecTy.getNumElements () * vecTy.getElementTypeBitWidth ()) / 8 ;
640640 assert (llvm::isPowerOf2_32 (vecBytes));
641641 Value vecBytesVal = b.i32_val (vecBytes);
642- int32_t cacheModifiers =
643- mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget (
644- op.getCache (), /* isLoad=*/ true , targetInfo);
642+
643+ Value cacheModifiers =
644+ b.i32_val (mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget (
645+ op.getCache (), /* isLoad=*/ true , targetInfo));
645646
646647 Value llMask = adaptor.getMask ();
647648 SmallVector<Value> maskElems;
@@ -679,7 +680,7 @@ struct AsyncCopyGlobalToLocalOpConversion
679680 auto globalLoadLdsOp = rewriter.create <ROCDL::GlobalLoadLDSOp>(
680681 loc,
681682 /* globalPtr=*/ srcPtr, /* ldsPtr=*/ coalescedShmemAddr[i],
682- /* size=*/ vecBytes , /* offset=*/ 0 ,
683+ /* size=*/ vecBytesVal , /* offset=*/ b. i32_val ( 0 ) ,
683684 /* aux=*/ cacheModifiers, /* alias_scopes=*/ nullptr ,
684685 /* noalias_scopes=*/ nullptr , /* tbaa=*/ nullptr );
685686 LLVM::AMD::addAsyncCopyAliasScope (globalLoadLdsOp);
@@ -694,8 +695,8 @@ struct AsyncCopyGlobalToLocalOpConversion
694695 rewriter.create <LLVM::CondBrOp>(loc, pred, loadBlock, afterLoad);
695696 rewriter.setInsertionPointToStart (loadBlock);
696697 auto globalLoadLdsOp = rewriter.create <ROCDL::GlobalLoadLDSOp>(
697- loc, srcPtr, coalescedShmemAddr[i], vecBytes ,
698- /* offset=*/ 0 , cacheModifiers, nullptr , nullptr , nullptr );
698+ loc, srcPtr, coalescedShmemAddr[i], vecBytesVal ,
699+ /* offset=*/ b. i32_val ( 0 ) , cacheModifiers, nullptr , nullptr , nullptr );
699700 LLVM::AMD::addAsyncCopyAliasScope (globalLoadLdsOp);
700701
701702 rewriter.create <LLVM::BrOp>(loc, afterLoad);
0 commit comments