@@ -143,7 +143,8 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
143143 llvm::SmallVector<mlir::Type> operandsTypes;
144144 for (auto arg : gpuLaunchFunc.getKernelOperands ())
145145 operandsTypes.push_back (arg.getType ());
146- auto fctTy = mlir::FunctionType::get (&context, operandsTypes, {});
146+ auto fctTy = mlir::FunctionType::get (&context, operandsTypes,
147+ gpuLaunchFunc.getResultTypes ());
147148 if (!hasPortableSignature (fctTy, op))
148149 convertCallOp (gpuLaunchFunc, fctTy);
149150 } else if (auto addr = mlir::dyn_cast<fir::AddrOfOp>(op)) {
@@ -520,10 +521,14 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
520521 llvm::SmallVector<mlir::Value, 1 > newCallResults;
521522 // TODO propagate/update call argument and result attributes.
522523 if constexpr (std::is_same_v<std::decay_t <A>, mlir::gpu::LaunchFuncOp>) {
524+ mlir::Value asyncToken = callOp.getAsyncToken ();
523525 auto newCall = A::create (*rewriter, loc, callOp.getKernel (),
524526 callOp.getGridSizeOperandValues (),
525527 callOp.getBlockSizeOperandValues (),
526- callOp.getDynamicSharedMemorySize (), newOpers);
528+ callOp.getDynamicSharedMemorySize (), newOpers,
529+ asyncToken ? asyncToken.getType () : nullptr ,
530+ callOp.getAsyncDependencies (),
531+ /* clusterSize=*/ std::nullopt );
527532 if (callOp.getClusterSizeX ())
528533 newCall.getClusterSizeXMutable ().assign (callOp.getClusterSizeX ());
529534 if (callOp.getClusterSizeY ())
0 commit comments