@@ -360,8 +360,10 @@ class InsertGPUAllocsPass final
360
360
auto newAlloc = builder.create <mlir::memref::AllocOp>(
361
361
loc, alloc.getType (), alloc.getDynamicSizes (),
362
362
alloc.getSymbolOperands ());
363
- builder.create <mlir::memref::CopyOp>(loc, allocResult,
364
- newAlloc.getResult ());
363
+ builder.create <mlir::gpu::MemcpyOp>(
364
+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
365
+ /* asyncDependencies*/ std::nullopt, newAlloc.getResult (),
366
+ allocResult);
365
367
use.set (newAlloc.getResult ());
366
368
}
367
369
}
@@ -401,8 +403,9 @@ class InsertGPUAllocsPass final
401
403
/* symbolOperands*/ std::nullopt, hostShared);
402
404
auto allocResult = gpuAlloc.getResult (0 );
403
405
if (access.hostWrite && access.deviceRead ) {
404
- auto copy =
405
- builder.create <mlir::memref::CopyOp>(loc, op, allocResult);
406
+ auto copy = builder.create <mlir::gpu::MemcpyOp>(
407
+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
408
+ /* asyncDependencies*/ std::nullopt, allocResult, op);
406
409
filter.insert (copy);
407
410
}
408
411
@@ -421,7 +424,9 @@ class InsertGPUAllocsPass final
421
424
op.replaceAllUsesExcept (allocResult, filter);
422
425
builder.setInsertionPoint (term);
423
426
if (access.hostRead && access.deviceWrite ) {
424
- builder.create <mlir::memref::CopyOp>(loc, allocResult, op);
427
+ builder.create <mlir::gpu::MemcpyOp>(
428
+ loc, /* asyncToken*/ static_cast <mlir::Type>(nullptr ),
429
+ /* asyncDependencies*/ std::nullopt, op, allocResult);
425
430
}
426
431
builder.create <mlir::gpu::DeallocOp>(loc, std::nullopt, allocResult);
427
432
}
0 commit comments