@@ -932,6 +932,7 @@ static constexpr IntrinsicHandler handlers[]{
932932     {{{" count"  , asAddr}, {" count_rate"  , asAddr}, {" count_max"  , asAddr}}},
933933     /* isElemental=*/ false },
934934    {" tand"  , &I::genTand},
935+     {" this_grid"  , &I::genThisGrid, {}, /* isElemental=*/ false },
935936    {" threadfence"  , &I::genThreadFence, {}, /* isElemental=*/ false },
936937    {" threadfence_block"  , &I::genThreadFenceBlock, {}, /* isElemental=*/ false },
937938    {" threadfence_system"  , &I::genThreadFenceSystem, {}, /* isElemental=*/ false },
@@ -8109,6 +8110,90 @@ mlir::Value IntrinsicLibrary::genTand(mlir::Type resultType,
81098110  return  getRuntimeCallGenerator (" tan"  , ftype)(builder, loc, {arg});
81108111}
81118112
8113+ //  THIS_GRID
8114+ mlir::Value IntrinsicLibrary::genThisGrid (mlir::Type resultType,
8115+                                           llvm::ArrayRef<mlir::Value> args) {
8116+   assert (args.size () == 0 );
8117+   auto  recTy = mlir::cast<fir::RecordType>(resultType);
8118+   assert (recTy && " RecordType expepected"  );
8119+   mlir::Value res = builder.create <fir::AllocaOp>(loc, resultType);
8120+   mlir::Type i32Ty = builder.getI32Type ();
8121+ 
8122+   mlir::Value threadIdX = builder.create <mlir::NVVM::ThreadIdXOp>(loc, i32Ty);
8123+   mlir::Value threadIdY = builder.create <mlir::NVVM::ThreadIdYOp>(loc, i32Ty);
8124+   mlir::Value threadIdZ = builder.create <mlir::NVVM::ThreadIdZOp>(loc, i32Ty);
8125+ 
8126+   mlir::Value blockIdX = builder.create <mlir::NVVM::BlockIdXOp>(loc, i32Ty);
8127+   mlir::Value blockIdY = builder.create <mlir::NVVM::BlockIdYOp>(loc, i32Ty);
8128+   mlir::Value blockIdZ = builder.create <mlir::NVVM::BlockIdZOp>(loc, i32Ty);
8129+ 
8130+   mlir::Value blockDimX = builder.create <mlir::NVVM::BlockDimXOp>(loc, i32Ty);
8131+   mlir::Value blockDimY = builder.create <mlir::NVVM::BlockDimYOp>(loc, i32Ty);
8132+   mlir::Value blockDimZ = builder.create <mlir::NVVM::BlockDimZOp>(loc, i32Ty);
8133+   mlir::Value gridDimX = builder.create <mlir::NVVM::GridDimXOp>(loc, i32Ty);
8134+   mlir::Value gridDimY = builder.create <mlir::NVVM::GridDimYOp>(loc, i32Ty);
8135+   mlir::Value gridDimZ = builder.create <mlir::NVVM::GridDimZOp>(loc, i32Ty);
8136+ 
8137+   //  this_grid.size = ((blockDim.z * gridDim.z) * (blockDim.y * gridDim.y)) *
8138+   //  (blockDim.x * gridDim.x);
8139+   mlir::Value resZ =
8140+       builder.create <mlir::arith::MulIOp>(loc, blockDimZ, gridDimZ);
8141+   mlir::Value resY =
8142+       builder.create <mlir::arith::MulIOp>(loc, blockDimY, gridDimY);
8143+   mlir::Value resX =
8144+       builder.create <mlir::arith::MulIOp>(loc, blockDimX, gridDimX);
8145+   mlir::Value resZY = builder.create <mlir::arith::MulIOp>(loc, resZ, resY);
8146+   mlir::Value size = builder.create <mlir::arith::MulIOp>(loc, resZY, resX);
8147+ 
8148+   //  tmp = ((blockIdx.z * gridDim.y * gridDim.x) + (blockIdx.y * gridDim.x)) +
8149+   //    blockIdx.x;
8150+   //  this_group.rank = tmp * ((blockDim.x * blockDim.y) * blockDim.z) +
8151+   //    ((threadIdx.z * blockDim.y) * blockDim.x) +
8152+   //    (threadIdx.y * blockDim.x) + threadIdx.x + 1;
8153+   mlir::Value r1 = builder.create <mlir::arith::MulIOp>(loc, blockIdZ, gridDimY);
8154+   mlir::Value r2 = builder.create <mlir::arith::MulIOp>(loc, r1, gridDimX);
8155+   mlir::Value r3 = builder.create <mlir::arith::MulIOp>(loc, blockIdY, gridDimX);
8156+   mlir::Value r2r3 = builder.create <mlir::arith::AddIOp>(loc, r2, r3);
8157+   mlir::Value tmp = builder.create <mlir::arith::AddIOp>(loc, r2r3, blockIdX);
8158+ 
8159+   mlir::Value bXbY =
8160+       builder.create <mlir::arith::MulIOp>(loc, blockDimX, blockDimY);
8161+   mlir::Value bXbYbZ =
8162+       builder.create <mlir::arith::MulIOp>(loc, bXbY, blockDimZ);
8163+   mlir::Value tZbY =
8164+       builder.create <mlir::arith::MulIOp>(loc, threadIdZ, blockDimY);
8165+   mlir::Value tZbYbX =
8166+       builder.create <mlir::arith::MulIOp>(loc, tZbY, blockDimX);
8167+   mlir::Value tYbX =
8168+       builder.create <mlir::arith::MulIOp>(loc, threadIdY, blockDimX);
8169+   mlir::Value rank = builder.create <mlir::arith::MulIOp>(loc, tmp, bXbYbZ);
8170+   rank = builder.create <mlir::arith::AddIOp>(loc, rank, tZbYbX);
8171+   rank = builder.create <mlir::arith::AddIOp>(loc, rank, tYbX);
8172+   rank = builder.create <mlir::arith::AddIOp>(loc, rank, threadIdX);
8173+   mlir::Value one = builder.createIntegerConstant (loc, i32Ty, 1 );
8174+   rank = builder.create <mlir::arith::AddIOp>(loc, rank, one);
8175+ 
8176+   auto  sizeFieldName = recTy.getTypeList ()[1 ].first ;
8177+   mlir::Type sizeFieldTy = recTy.getTypeList ()[1 ].second ;
8178+   mlir::Type fieldIndexType = fir::FieldType::get (resultType.getContext ());
8179+   mlir::Value sizeFieldIndex = builder.create <fir::FieldIndexOp>(
8180+       loc, fieldIndexType, sizeFieldName, recTy,
8181+       /* typeParams=*/  mlir::ValueRange{});
8182+   mlir::Value sizeCoord = builder.create <fir::CoordinateOp>(
8183+       loc, builder.getRefType (sizeFieldTy), res, sizeFieldIndex);
8184+   builder.create <fir::StoreOp>(loc, size, sizeCoord);
8185+ 
8186+   auto  rankFieldName = recTy.getTypeList ()[2 ].first ;
8187+   mlir::Type rankFieldTy = recTy.getTypeList ()[2 ].second ;
8188+   mlir::Value rankFieldIndex = builder.create <fir::FieldIndexOp>(
8189+       loc, fieldIndexType, rankFieldName, recTy,
8190+       /* typeParams=*/  mlir::ValueRange{});
8191+   mlir::Value rankCoord = builder.create <fir::CoordinateOp>(
8192+       loc, builder.getRefType (rankFieldTy), res, rankFieldIndex);
8193+   builder.create <fir::StoreOp>(loc, rank, rankCoord);
8194+   return  res;
8195+ }
8196+ 
81128197//  TRAILZ
81138198mlir::Value IntrinsicLibrary::genTrailz (mlir::Type resultType,
81148199                                        llvm::ArrayRef<mlir::Value> args) {
0 commit comments