Skip to content

Commit ce30ee5

Browse files
authored
[flang][cuda] Add gpu.launch to device context (#123105)
`gpu.launch` should also be considered device context.
1 parent 2bb5ca5 commit ce30ee5

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

flang/lib/Optimizer/Transforms/CUFOpConversion.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ static bool inDeviceContext(mlir::Operation *op) {
224224
return true;
225225
if (auto funcOp = op->getParentOfType<mlir::gpu::GPUFuncOp>())
226226
return true;
227+
if (auto funcOp = op->getParentOfType<mlir::gpu::LaunchOp>())
228+
return true;
227229
if (auto funcOp = op->getParentOfType<mlir::func::FuncOp>()) {
228230
if (auto cudaProcAttr =
229231
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(

flang/test/Fir/CUDA/cuda-global-addr.mlir

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,28 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> :
6565
// There is no symbol for it and the call would result into an unresolved reference.
6666
// CHECK-NOT: fir.call {{.*}}GetDeviceAddress
6767

68+
// -----
69+
70+
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
71+
fir.global @_QMmod1Eadev {data_attr = #cuf.cuda<device>} : !fir.array<10xi32> {
72+
%0 = fir.zero_bits !fir.array<10xi32>
73+
fir.has_value %0 : !fir.array<10xi32>
74+
}
75+
func.func @_QQmain() attributes {fir.bindc_name = "test"} {
76+
%dim = arith.constant 1 : index
77+
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %dim, %grid_y = %dim, %grid_z = %dim)
78+
threads(%tx, %ty, %tz) in (%block_x = %dim, %block_y = %dim, %block_z = %dim) {
79+
%c10 = arith.constant 10 : index
80+
%1 = fir.shape %c10 : (index) -> !fir.shape<1>
81+
%3 = fir.address_of(@_QMmod1Eadev) : !fir.ref<!fir.array<10xi32>>
82+
%4 = fir.declare %3(%1) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
83+
gpu.terminator
84+
}
85+
return
86+
}
87+
88+
// CHECK-LABEL: func.func @_QQmain()
89+
// CHECK: gpu.launch
90+
// CHECK-NOT: fir.call {{.*}}GetDeviceAddress
91+
92+
}

0 commit comments

Comments
 (0)