Skip to content

Commit 5668c7b

Browse files
authored
[flang][cuda] Add more interfaces for __ldca, __ldcs, __ldlu and __ldcv (#130218)
1 parent 6fa1bfa commit 5668c7b

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed

flang/module/cudadevice.f90

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
11181118
!dir$ ignore_tkr (d) x
11191119
complex(8), intent(in) :: x
11201120
end function
1121+
attributes(device) pure function __ldca_i4x4(x) result(y)
1122+
!dir$ ignore_tkr (d) x
1123+
integer(4), dimension(4), intent(in) :: x
1124+
integer(4), dimension(4) :: y
1125+
end function
1126+
attributes(device) pure function __ldca_i8x2(x) result(y)
1127+
!dir$ ignore_tkr (d) x
1128+
integer(8), dimension(2), intent(in) :: x
1129+
integer(8), dimension(2) :: y
1130+
end function
1131+
attributes(device) pure function __ldca_r2x2(x) result(y)
1132+
!dir$ ignore_tkr (d) x
1133+
real(2), dimension(2), intent(in) :: x
1134+
real(2), dimension(2) :: y
1135+
end function
1136+
attributes(device) pure function __ldca_r4x4(x) result(y)
1137+
!dir$ ignore_tkr (d) x
1138+
real(4), dimension(4), intent(in) :: x
1139+
real(4), dimension(4) :: y
1140+
end function
1141+
attributes(device) pure function __ldca_r8x2(x) result(y)
1142+
!dir$ ignore_tkr (d) x
1143+
real(8), dimension(2), intent(in) :: x
1144+
real(8), dimension(2) :: y
1145+
end function
11211146
end interface
11221147

11231148
! LDCS
@@ -1158,6 +1183,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
11581183
!dir$ ignore_tkr (d) x
11591184
complex(8), intent(in) :: x
11601185
end function
1186+
attributes(device) pure function __ldcs_i4x4(x) result(y)
1187+
!dir$ ignore_tkr (d) x
1188+
integer(4), dimension(4), intent(in) :: x
1189+
integer(4), dimension(4) :: y
1190+
end function
1191+
attributes(device) pure function __ldcs_i8x2(x) result(y)
1192+
!dir$ ignore_tkr (d) x
1193+
integer(8), dimension(2), intent(in) :: x
1194+
integer(8), dimension(2) :: y
1195+
end function
1196+
attributes(device) pure function __ldcs_r2x2(x) result(y)
1197+
!dir$ ignore_tkr (d) x
1198+
real(2), dimension(2), intent(in) :: x
1199+
real(2), dimension(2) :: y
1200+
end function
1201+
attributes(device) pure function __ldcs_r4x4(x) result(y)
1202+
!dir$ ignore_tkr (d) x
1203+
real(4), dimension(4), intent(in) :: x
1204+
real(4), dimension(4) :: y
1205+
end function
1206+
attributes(device) pure function __ldcs_r8x2(x) result(y)
1207+
!dir$ ignore_tkr (d) x
1208+
real(8), dimension(2), intent(in) :: x
1209+
real(8), dimension(2) :: y
1210+
end function
11611211
end interface
11621212

11631213
! LDLU
@@ -1198,6 +1248,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
11981248
!dir$ ignore_tkr (d) x
11991249
complex(8), intent(in) :: x
12001250
end function
1251+
attributes(device) pure function __ldlu_i4x4(x) result(y)
1252+
!dir$ ignore_tkr (d) x
1253+
integer(4), dimension(4), intent(in) :: x
1254+
integer(4), dimension(4) :: y
1255+
end function
1256+
attributes(device) pure function __ldlu_i8x2(x) result(y)
1257+
!dir$ ignore_tkr (d) x
1258+
integer(8), dimension(2), intent(in) :: x
1259+
integer(8), dimension(2) :: y
1260+
end function
1261+
attributes(device) pure function __ldlu_r2x2(x) result(y)
1262+
!dir$ ignore_tkr (d) x
1263+
real(2), dimension(2), intent(in) :: x
1264+
real(2), dimension(2) :: y
1265+
end function
1266+
attributes(device) pure function __ldlu_r4x4(x) result(y)
1267+
!dir$ ignore_tkr (d) x
1268+
real(4), dimension(4), intent(in) :: x
1269+
real(4), dimension(4) :: y
1270+
end function
1271+
attributes(device) pure function __ldlu_r8x2(x) result(y)
1272+
!dir$ ignore_tkr (d) x
1273+
real(8), dimension(2), intent(in) :: x
1274+
real(8), dimension(2) :: y
1275+
end function
12011276
end interface
12021277

12031278
! LDCV
@@ -1238,6 +1313,31 @@ attributes(device) integer function match_any_syncjd(mask, val)
12381313
!dir$ ignore_tkr (d) x
12391314
complex(8), intent(in) :: x
12401315
end function
1316+
attributes(device) pure function __ldcv_i4x4(x) result(y)
1317+
!dir$ ignore_tkr (d) x
1318+
integer(4), dimension(4), intent(in) :: x
1319+
integer(4), dimension(4) :: y
1320+
end function
1321+
attributes(device) pure function __ldcv_i8x2(x) result(y)
1322+
!dir$ ignore_tkr (d) x
1323+
integer(8), dimension(2), intent(in) :: x
1324+
integer(8), dimension(2) :: y
1325+
end function
1326+
attributes(device) pure function __ldcv_r2x2(x) result(y)
1327+
!dir$ ignore_tkr (d) x
1328+
real(2), dimension(2), intent(in) :: x
1329+
real(2), dimension(2) :: y
1330+
end function
1331+
attributes(device) pure function __ldcv_r4x4(x) result(y)
1332+
!dir$ ignore_tkr (d) x
1333+
real(4), dimension(4), intent(in) :: x
1334+
real(4), dimension(4) :: y
1335+
end function
1336+
attributes(device) pure function __ldcv_r8x2(x) result(y)
1337+
!dir$ ignore_tkr (d) x
1338+
real(8), dimension(2), intent(in) :: x
1339+
real(8), dimension(2) :: y
1340+
end function
12411341
end interface
12421342

12431343
! STWB

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,3 +198,83 @@ end subroutine
198198
! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<f64> to !llvm.ptr
199199
! CHECK: %[[ATOMIC:.*]] = llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i64
200200
! CHECK: %[[RES:.*]] = llvm.extractvalue %[[ATOMIC]][1] : !llvm.struct<(i64, i1)>
201+
202+
attributes(global) subroutine __ldXXi4(b)
203+
integer, device :: b(*)
204+
integer, device :: x(4)
205+
x(1:4) = __ldca(b(i:j))
206+
x = __ldcg(b(i:j))
207+
x = __ldcs(b(i:j))
208+
x(1:4) = __ldlu(b(i:j))
209+
x(1:4) = __ldcv(b(i:j))
210+
end
211+
212+
! CHECK-LABEL: func.func @_QP__ldxxi4
213+
! CHECK: __ldca_i4x4
214+
! CHECK: __ldcg_i4x4
215+
! CHECK: __ldcs_i4x4
216+
! CHECK: __ldlu_i4x4
217+
218+
attributes(global) subroutine __ldXXi8(b)
219+
integer(8), device :: b(*)
220+
integer(8), device :: x(2)
221+
x(1:2) = __ldca(b(i:j))
222+
x = __ldcg(b(i:j))
223+
x = __ldcs(b(i:j))
224+
x(1:2) = __ldlu(b(i:j))
225+
x(1:2) = __ldcv(b(i:j))
226+
end
227+
228+
! CHECK-LABEL: func.func @_QP__ldxxi8
229+
! CHECK: __ldca_i8x2
230+
! CHECK: __ldcg_i8x2
231+
! CHECK: __ldcs_i8x2
232+
! CHECK: __ldlu_i8x2
233+
234+
attributes(global) subroutine __ldXXr4(b)
235+
real, device :: b(*)
236+
real, device :: x(4)
237+
x(1:4) = __ldca(b(i:j))
238+
x = __ldcg(b(i:j))
239+
x = __ldcs(b(i:j))
240+
x(1:4) = __ldlu(b(i:j))
241+
x(1:4) = __ldcv(b(i:j))
242+
end
243+
244+
! CHECK-LABEL: func.func @_QP__ldxxr4
245+
! CHECK: __ldca_r4x4
246+
! CHECK: __ldcg_r4x4
247+
! CHECK: __ldcs_r4x4
248+
! CHECK: __ldlu_r4x4
249+
250+
attributes(global) subroutine __ldXXr2(b)
251+
real(2), device :: b(*)
252+
real(2), device :: x(2)
253+
x(1:2) = __ldca(b(i:j))
254+
x = __ldcg(b(i:j))
255+
x = __ldcs(b(i:j))
256+
x(1:2) = __ldlu(b(i:j))
257+
x(1:2) = __ldcv(b(i:j))
258+
end
259+
260+
! CHECK-LABEL: func.func @_QP__ldxxr2
261+
! CHECK: __ldca_r2x2
262+
! CHECK: __ldcg_r2x2
263+
! CHECK: __ldcs_r2x2
264+
! CHECK: __ldlu_r2x2
265+
266+
attributes(global) subroutine __ldXXr8(b)
267+
real(8), device :: b(*)
268+
real(8), device :: x(2)
269+
x(1:2) = __ldca(b(i:j))
270+
x = __ldcg(b(i:j))
271+
x = __ldcs(b(i:j))
272+
x(1:2) = __ldlu(b(i:j))
273+
x(1:2) = __ldcv(b(i:j))
274+
end
275+
276+
! CHECK-LABEL: func.func @_QP__ldxxr8
277+
! CHECK: __ldca_r8x2
278+
! CHECK: __ldcg_r8x2
279+
! CHECK: __ldcs_r8x2
280+
! CHECK: __ldlu_r8x2

0 commit comments

Comments
 (0)