diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 4491e9f653270..baaa112f5d8c2 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1118,6 +1118,31 @@ attributes(device) integer function match_any_syncjd(mask, val) !dir$ ignore_tkr (d) x complex(8), intent(in) :: x end function + attributes(device) pure function __ldca_i4x4(x) result(y) + !dir$ ignore_tkr (d) x + integer(4), dimension(4), intent(in) :: x + integer(4), dimension(4) :: y + end function + attributes(device) pure function __ldca_i8x2(x) result(y) + !dir$ ignore_tkr (d) x + integer(8), dimension(2), intent(in) :: x + integer(8), dimension(2) :: y + end function + attributes(device) pure function __ldca_r2x2(x) result(y) + !dir$ ignore_tkr (d) x + real(2), dimension(2), intent(in) :: x + real(2), dimension(2) :: y + end function + attributes(device) pure function __ldca_r4x4(x) result(y) + !dir$ ignore_tkr (d) x + real(4), dimension(4), intent(in) :: x + real(4), dimension(4) :: y + end function + attributes(device) pure function __ldca_r8x2(x) result(y) + !dir$ ignore_tkr (d) x + real(8), dimension(2), intent(in) :: x + real(8), dimension(2) :: y + end function end interface ! LDCS @@ -1158,6 +1183,31 @@ attributes(device) integer function match_any_syncjd(mask, val) !dir$ ignore_tkr (d) x complex(8), intent(in) :: x end function + attributes(device) pure function __ldcs_i4x4(x) result(y) + !dir$ ignore_tkr (d) x + integer(4), dimension(4), intent(in) :: x + integer(4), dimension(4) :: y + end function + attributes(device) pure function __ldcs_i8x2(x) result(y) + !dir$ ignore_tkr (d) x + integer(8), dimension(2), intent(in) :: x + integer(8), dimension(2) :: y + end function + attributes(device) pure function __ldcs_r2x2(x) result(y) + !dir$ ignore_tkr (d) x + real(2), dimension(2), intent(in) :: x + real(2), dimension(2) :: y + end function + attributes(device) pure function __ldcs_r4x4(x) result(y) + !dir$ ignore_tkr (d) x + real(4), dimension(4), intent(in) :: x + real(4), dimension(4) :: y + end function + attributes(device) pure function __ldcs_r8x2(x) result(y) + !dir$ ignore_tkr (d) x + real(8), dimension(2), intent(in) :: x + real(8), dimension(2) :: y + end function end interface ! LDLU @@ -1198,6 +1248,31 @@ attributes(device) integer function match_any_syncjd(mask, val) !dir$ ignore_tkr (d) x complex(8), intent(in) :: x end function + attributes(device) pure function __ldlu_i4x4(x) result(y) + !dir$ ignore_tkr (d) x + integer(4), dimension(4), intent(in) :: x + integer(4), dimension(4) :: y + end function + attributes(device) pure function __ldlu_i8x2(x) result(y) + !dir$ ignore_tkr (d) x + integer(8), dimension(2), intent(in) :: x + integer(8), dimension(2) :: y + end function + attributes(device) pure function __ldlu_r2x2(x) result(y) + !dir$ ignore_tkr (d) x + real(2), dimension(2), intent(in) :: x + real(2), dimension(2) :: y + end function + attributes(device) pure function __ldlu_r4x4(x) result(y) + !dir$ ignore_tkr (d) x + real(4), dimension(4), intent(in) :: x + real(4), dimension(4) :: y + end function + attributes(device) pure function __ldlu_r8x2(x) result(y) + !dir$ ignore_tkr (d) x + real(8), dimension(2), intent(in) :: x + real(8), dimension(2) :: y + end function end interface ! LDCV @@ -1238,6 +1313,31 @@ attributes(device) integer function match_any_syncjd(mask, val) !dir$ ignore_tkr (d) x complex(8), intent(in) :: x end function + attributes(device) pure function __ldcv_i4x4(x) result(y) + !dir$ ignore_tkr (d) x + integer(4), dimension(4), intent(in) :: x + integer(4), dimension(4) :: y + end function + attributes(device) pure function __ldcv_i8x2(x) result(y) + !dir$ ignore_tkr (d) x + integer(8), dimension(2), intent(in) :: x + integer(8), dimension(2) :: y + end function + attributes(device) pure function __ldcv_r2x2(x) result(y) + !dir$ ignore_tkr (d) x + real(2), dimension(2), intent(in) :: x + real(2), dimension(2) :: y + end function + attributes(device) pure function __ldcv_r4x4(x) result(y) + !dir$ ignore_tkr (d) x + real(4), dimension(4), intent(in) :: x + real(4), dimension(4) :: y + end function + attributes(device) pure function __ldcv_r8x2(x) result(y) + !dir$ ignore_tkr (d) x + real(8), dimension(2), intent(in) :: x + real(8), dimension(2) :: y + end function end interface ! STWB diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index c651d34c55093..5f39f78f8ecae 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -198,3 +198,83 @@ end subroutine ! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref to !llvm.ptr ! CHECK: %[[ATOMIC:.*]] = llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i64 ! CHECK: %[[RES:.*]] = llvm.extractvalue %[[ATOMIC]][1] : !llvm.struct<(i64, i1)> + +attributes(global) subroutine __ldXXi4(b) + integer, device :: b(*) + integer, device :: x(4) + x(1:4) = __ldca(b(i:j)) + x = __ldcg(b(i:j)) + x = __ldcs(b(i:j)) + x(1:4) = __ldlu(b(i:j)) + x(1:4) = __ldcv(b(i:j)) +end + +! CHECK-LABEL: func.func @_QP__ldxxi4 +! CHECK: __ldca_i4x4 +! CHECK: __ldcg_i4x4 +! CHECK: __ldcs_i4x4 +! CHECK: __ldlu_i4x4 + +attributes(global) subroutine __ldXXi8(b) + integer(8), device :: b(*) + integer(8), device :: x(2) + x(1:2) = __ldca(b(i:j)) + x = __ldcg(b(i:j)) + x = __ldcs(b(i:j)) + x(1:2) = __ldlu(b(i:j)) + x(1:2) = __ldcv(b(i:j)) +end + +! CHECK-LABEL: func.func @_QP__ldxxi8 +! CHECK: __ldca_i8x2 +! CHECK: __ldcg_i8x2 +! CHECK: __ldcs_i8x2 +! CHECK: __ldlu_i8x2 + +attributes(global) subroutine __ldXXr4(b) + real, device :: b(*) + real, device :: x(4) + x(1:4) = __ldca(b(i:j)) + x = __ldcg(b(i:j)) + x = __ldcs(b(i:j)) + x(1:4) = __ldlu(b(i:j)) + x(1:4) = __ldcv(b(i:j)) +end + +! CHECK-LABEL: func.func @_QP__ldxxr4 +! CHECK: __ldca_r4x4 +! CHECK: __ldcg_r4x4 +! CHECK: __ldcs_r4x4 +! CHECK: __ldlu_r4x4 + +attributes(global) subroutine __ldXXr2(b) + real(2), device :: b(*) + real(2), device :: x(2) + x(1:2) = __ldca(b(i:j)) + x = __ldcg(b(i:j)) + x = __ldcs(b(i:j)) + x(1:2) = __ldlu(b(i:j)) + x(1:2) = __ldcv(b(i:j)) +end + +! CHECK-LABEL: func.func @_QP__ldxxr2 +! CHECK: __ldca_r2x2 +! CHECK: __ldcg_r2x2 +! CHECK: __ldcs_r2x2 +! CHECK: __ldlu_r2x2 + +attributes(global) subroutine __ldXXr8(b) + real(8), device :: b(*) + real(8), device :: x(2) + x(1:2) = __ldca(b(i:j)) + x = __ldcg(b(i:j)) + x = __ldcs(b(i:j)) + x(1:2) = __ldlu(b(i:j)) + x(1:2) = __ldcv(b(i:j)) +end + +! CHECK-LABEL: func.func @_QP__ldxxr8 +! CHECK: __ldca_r8x2 +! CHECK: __ldcg_r8x2 +! CHECK: __ldcs_r8x2 +! CHECK: __ldlu_r8x2