|
19 | 19 | #include "flang/Optimizer/Builder/MutableBox.h" |
20 | 20 | #include "flang/Optimizer/Dialect/CUF/CUFOps.h" |
21 | 21 | #include "flang/Optimizer/HLFIR/HLFIROps.h" |
| 22 | +#include "flang/Runtime/entry-names.h" |
22 | 23 | #include "mlir/Dialect/Index/IR/IndexOps.h" |
23 | 24 | #include "mlir/Dialect/SCF/IR/SCF.h" |
24 | 25 | #include "mlir/Dialect/Vector/IR/VectorOps.h" |
@@ -382,6 +383,16 @@ static constexpr IntrinsicHandler cudaHandlers[]{ |
382 | 383 | &CI::genClusterDimBlocks), |
383 | 384 | {}, |
384 | 385 | /*isElemental=*/false}, |
| 386 | + {"cudagetstreamdefaultarg", |
| 387 | + static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>( |
| 388 | + &CI::genCUDAGetDefaultStreamArg), |
| 389 | + {{{"devptr", asAddr}}}, |
| 390 | + /*isElemental=*/false}, |
| 391 | + {"cudasetstreamarray", |
| 392 | + static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>( |
| 393 | + &CI::genCUDASetDefaultStreamArray), |
| 394 | + {{{"devptr", asAddr}, {"stream", asValue}}}, |
| 395 | + /*isElemental=*/false}, |
385 | 396 | {"fence_proxy_async", |
386 | 397 | static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>( |
387 | 398 | &CI::genFenceProxyAsync), |
@@ -1103,6 +1114,46 @@ CUDAIntrinsicLibrary::genClusterDimBlocks(mlir::Type resultType, |
1103 | 1114 | return res; |
1104 | 1115 | } |
1105 | 1116 |
|
| 1117 | +// CUDASETSTREAMARRAY |
| 1118 | +fir::ExtendedValue CUDAIntrinsicLibrary::genCUDASetDefaultStreamArray( |
| 1119 | + mlir::Type resTy, llvm::ArrayRef<fir::ExtendedValue> args) { |
| 1120 | + assert(args.size() == 2); |
| 1121 | + mlir::Value arg = fir::getBase(args[0]); |
| 1122 | + mlir::Value stream = fir::getBase(args[1]); |
| 1123 | + |
| 1124 | + if (mlir::isa<fir::BaseBoxType>(arg.getType())) |
| 1125 | + arg = fir::BoxAddrOp::create(builder, loc, arg); |
| 1126 | + mlir::Type i64Ty = builder.getI64Type(); |
| 1127 | + mlir::Type i32Ty = builder.getI32Type(); |
| 1128 | + auto ctx = builder.getContext(); |
| 1129 | + mlir::Type voidPtrTy = |
| 1130 | + fir::LLVMPointerType::get(ctx, mlir::IntegerType::get(ctx, 8)); |
| 1131 | + mlir::FunctionType ftype = |
| 1132 | + mlir::FunctionType::get(ctx, {voidPtrTy, i64Ty}, {i32Ty}); |
| 1133 | + mlir::Value voidPtr = builder.createConvert(loc, voidPtrTy, arg); |
| 1134 | + auto funcOp = |
| 1135 | + builder.createFunction(loc, RTNAME_STRING(CUFSetAssociatedStream), ftype); |
| 1136 | + auto call = fir::CallOp::create(builder, loc, funcOp, {voidPtr, stream}); |
| 1137 | + return call.getResult(0); |
| 1138 | +} |
| 1139 | + |
| 1140 | +// CUDAGETDEFAULTSTREAMARG |
| 1141 | +fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAGetDefaultStreamArg( |
| 1142 | + mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { |
| 1143 | + assert(args.size() == 1); |
| 1144 | + mlir::Value devptr = fir::getBase(args[0]); |
| 1145 | + mlir::Type i64Ty = builder.getI64Type(); |
| 1146 | + auto ctx = builder.getContext(); |
| 1147 | + mlir::Type voidPtrTy = |
| 1148 | + fir::LLVMPointerType::get(ctx, mlir::IntegerType::get(ctx, 8)); |
| 1149 | + mlir::FunctionType ftype = mlir::FunctionType::get(ctx, {voidPtrTy}, {i64Ty}); |
| 1150 | + mlir::Value voidPtr = builder.createConvert(loc, voidPtrTy, devptr); |
| 1151 | + auto funcOp = |
| 1152 | + builder.createFunction(loc, RTNAME_STRING(CUFGetAssociatedStream), ftype); |
| 1153 | + auto call = fir::CallOp::create(builder, loc, funcOp, {voidPtr}); |
| 1154 | + return call.getResult(0); |
| 1155 | +} |
| 1156 | + |
1106 | 1157 | // FENCE_PROXY_ASYNC |
1107 | 1158 | void CUDAIntrinsicLibrary::genFenceProxyAsync( |
1108 | 1159 | llvm::ArrayRef<fir::ExtendedValue> args) { |
|
0 commit comments