@@ -933,6 +933,7 @@ static constexpr IntrinsicHandler handlers[]{
933
933
/* isElemental=*/ false },
934
934
{" tand" , &I::genTand},
935
935
{" this_grid" , &I::genThisGrid, {}, /* isElemental=*/ false },
936
+ {" this_warp" , &I::genThisWarp, {}, /* isElemental=*/ false },
936
937
{" threadfence" , &I::genThreadFence, {}, /* isElemental=*/ false },
937
938
{" threadfence_block" , &I::genThreadFenceBlock, {}, /* isElemental=*/ false },
938
939
{" threadfence_system" , &I::genThreadFenceSystem, {}, /* isElemental=*/ false },
@@ -8194,6 +8195,45 @@ mlir::Value IntrinsicLibrary::genThisGrid(mlir::Type resultType,
8194
8195
return res;
8195
8196
}
8196
8197
8198
+ // THIS_WARP
8199
+ mlir::Value IntrinsicLibrary::genThisWarp (mlir::Type resultType,
8200
+ llvm::ArrayRef<mlir::Value> args) {
8201
+ assert (args.size () == 0 );
8202
+ auto recTy = mlir::cast<fir::RecordType>(resultType);
8203
+ assert (recTy && " RecordType expepected" );
8204
+ mlir::Value res = builder.create <fir::AllocaOp>(loc, resultType);
8205
+ mlir::Type i32Ty = builder.getI32Type ();
8206
+
8207
+ // coalesced_group%size = 32
8208
+ mlir::Value size = builder.createIntegerConstant (loc, i32Ty, 32 );
8209
+ auto sizeFieldName = recTy.getTypeList ()[1 ].first ;
8210
+ mlir::Type sizeFieldTy = recTy.getTypeList ()[1 ].second ;
8211
+ mlir::Type fieldIndexType = fir::FieldType::get (resultType.getContext ());
8212
+ mlir::Value sizeFieldIndex = builder.create <fir::FieldIndexOp>(
8213
+ loc, fieldIndexType, sizeFieldName, recTy,
8214
+ /* typeParams=*/ mlir::ValueRange{});
8215
+ mlir::Value sizeCoord = builder.create <fir::CoordinateOp>(
8216
+ loc, builder.getRefType (sizeFieldTy), res, sizeFieldIndex);
8217
+ builder.create <fir::StoreOp>(loc, size, sizeCoord);
8218
+
8219
+ // coalesced_group%rank = threadIdx.x & 31 + 1
8220
+ mlir::Value threadIdX = builder.create <mlir::NVVM::ThreadIdXOp>(loc, i32Ty);
8221
+ mlir::Value mask = builder.createIntegerConstant (loc, i32Ty, 31 );
8222
+ mlir::Value one = builder.createIntegerConstant (loc, i32Ty, 1 );
8223
+ mlir::Value masked =
8224
+ builder.create <mlir::arith::AndIOp>(loc, threadIdX, mask);
8225
+ mlir::Value rank = builder.create <mlir::arith::AddIOp>(loc, masked, one);
8226
+ auto rankFieldName = recTy.getTypeList ()[2 ].first ;
8227
+ mlir::Type rankFieldTy = recTy.getTypeList ()[2 ].second ;
8228
+ mlir::Value rankFieldIndex = builder.create <fir::FieldIndexOp>(
8229
+ loc, fieldIndexType, rankFieldName, recTy,
8230
+ /* typeParams=*/ mlir::ValueRange{});
8231
+ mlir::Value rankCoord = builder.create <fir::CoordinateOp>(
8232
+ loc, builder.getRefType (rankFieldTy), res, rankFieldIndex);
8233
+ builder.create <fir::StoreOp>(loc, rank, rankCoord);
8234
+ return res;
8235
+ }
8236
+
8197
8237
// TRAILZ
8198
8238
mlir::Value IntrinsicLibrary::genTrailz (mlir::Type resultType,
8199
8239
llvm::ArrayRef<mlir::Value> args) {
0 commit comments