@@ -13,12 +13,8 @@ subroutine gemm_mnk(C, A, B, M, K, N) BIND(C, name="gemm_mnk")
1313 real (C_double), dimension (K, N), intent (in ) :: B
1414 integer (C_long) :: mm, kk, nn
1515 C = 0.0
16- do concurrent(mm = 1 :M)
17- do concurrent(nn = 1 :N)
18- do concurrent(kk = 1 :K)
19- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
20- end do
21- end do
16+ do concurrent(mm = 1 :M, nn = 1 :N, kk = 1 :K)
17+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
2218 end do
2319 end subroutine gemm_mnk
2420 subroutine gemm_mkn (C , A , B , M , K , N ) BIND(C, name= " gemm_mkn" )
@@ -28,12 +24,8 @@ subroutine gemm_mkn(C, A, B, M, K, N) BIND(C, name="gemm_mkn")
2824 real (C_double), dimension (K, N), intent (in ) :: B
2925 integer (C_long) :: mm, kk, nn
3026 C = 0.0
31- do concurrent(mm = 1 :M)
32- do concurrent(kk = 1 :K)
33- do concurrent(nn = 1 :N)
34- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
35- end do
36- end do
27+ do concurrent(mm = 1 :M, kk = 1 :K, nn = 1 :N)
28+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
3729 end do
3830 end subroutine gemm_mkn
3931 subroutine gemm_nmk (C , A , B , M , K , N ) BIND(C, name= " gemm_nmk" )
@@ -43,12 +35,8 @@ subroutine gemm_nmk(C, A, B, M, K, N) BIND(C, name="gemm_nmk")
4335 real (C_double), dimension (K, N), intent (in ) :: B
4436 integer (C_long) :: mm, kk, nn
4537 C = 0.0
46- do concurrent(nn = 1 :N)
47- do concurrent(mm = 1 :M)
48- do concurrent(kk = 1 :K)
49- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
50- end do
51- end do
38+ do concurrent(nn = 1 :N, mm = 1 :M, kk = 1 :K)
39+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
5240 end do
5341 end subroutine gemm_nmk
5442 subroutine gemm_nkm (C , A , B , M , K , N ) BIND(C, name= " gemm_nkm" )
@@ -58,12 +46,8 @@ subroutine gemm_nkm(C, A, B, M, K, N) BIND(C, name="gemm_nkm")
5846 real (C_double), dimension (K, N), intent (in ) :: B
5947 integer (C_long) :: mm, kk, nn
6048 C = 0.0
61- do concurrent(kk = 1 :K)
62- do concurrent(nn = 1 :N)
63- do concurrent(mm = 1 :M)
64- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
65- end do
66- end do
49+ do concurrent(kk = 1 :K, nn = 1 :N, mm = 1 :M)
50+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
6751 end do
6852 end subroutine gemm_nkm
6953 subroutine gemm_kmn (C , A , B , M , K , N ) BIND(C, name= " gemm_kmn" )
@@ -73,12 +57,8 @@ subroutine gemm_kmn(C, A, B, M, K, N) BIND(C, name="gemm_kmn")
7357 real (C_double), dimension (K, N), intent (in ) :: B
7458 integer (C_long) :: mm, kk, nn
7559 C = 0.0
76- do concurrent(kk = 1 :K)
77- do concurrent(mm = 1 :M)
78- do concurrent(nn = 1 :N)
79- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
80- end do
81- end do
60+ do concurrent(kk = 1 :K, mm = 1 :M, nn = 1 :N)
61+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
8262 end do
8363 end subroutine gemm_kmn
8464 subroutine gemm_knm (C , A , B , M , K , N ) BIND(C, name= " gemm_knm" )
@@ -88,12 +68,8 @@ subroutine gemm_knm(C, A, B, M, K, N) BIND(C, name="gemm_knm")
8868 real (C_double), dimension (K, N), intent (in ) :: B
8969 integer (C_long) :: mm, kk, nn
9070 C = 0.0
91- do concurrent(kk = 1 :K)
92- do concurrent(nn = 1 :N)
93- do concurrent(mm = 1 :M)
94- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
95- end do
96- end do
71+ do concurrent(kk = 1 :K, nn = 1 :N, mm = 1 :M)
72+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
9773 end do
9874 end subroutine gemm_knm
9975 subroutine gemmbuiltin (C , A , B , M , K , N ) BIND(C, name= " gemmbuiltin" )
@@ -110,12 +86,8 @@ subroutine AtmulB(C, A, B, M, K, N) BIND(C, name="AtmulB")
11086 real (C_double), dimension (K, N), intent (in ) :: B
11187 integer (C_long) :: mm, kk, nn
11288 C = 0.0
113- do concurrent(nn = 1 :N)
114- do concurrent(mm = 1 :M)
115- do concurrent(kk = 1 :K)
116- C(mm,nn) = C(mm,nn) + A(kk,mm) * B(kk,nn)
117- end do
118- end do
89+ do concurrent(nn = 1 :N, mm = 1 :M, kk = 1 :K)
90+ C(mm,nn) = C(mm,nn) + A(kk,mm) * B(kk,nn)
11991 end do
12092 end subroutine AtmulB
12193 subroutine AtmulBbuiltin (C , A , B , M , K , N ) BIND(C, name= " AtmulBbuiltin" )
@@ -132,12 +104,8 @@ subroutine AmulBt(C, A, B, M, K, N) BIND(C, name="AmulBt")
132104 real (C_double), dimension (N, K), intent (in ) :: B
133105 integer (C_long) :: mm, kk, nn
134106 C = 0.0
135- do concurrent(kk = 1 :K)
136- do concurrent(nn = 1 :N)
137- do concurrent(mm = 1 :M)
138- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(nn,kk)
139- end do
140- end do
107+ do concurrent(kk = 1 :K, nn = 1 :N, mm = 1 :M)
108+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(nn,kk)
141109 end do
142110 end subroutine AmulBt
143111 subroutine AmulBtbuiltin (C , A , B , M , K , N ) BIND(C, name= " AmulBtbuiltin" )
@@ -154,12 +122,8 @@ subroutine AtmulBt(C, A, B, M, K, N) BIND(C, name="AtmulBt")
154122 real (C_double), dimension (N, K), intent (in ) :: B
155123 integer (C_long) :: mm, kk, nn
156124 C = 0.0
157- do concurrent(nn = 1 :N)
158- do concurrent(kk = 1 :K)
159- do concurrent(mm = 1 :M)
160- C(mm,nn) = C(mm,nn) + A(kk,mm) * B(nn,kk)
161- end do
162- end do
125+ do concurrent(nn = 1 :N, kk = 1 :K, mm = 1 :M)
126+ C(mm,nn) = C(mm,nn) + A(kk,mm) * B(nn,kk)
163127 end do
164128 end subroutine AtmulBt
165129 subroutine AtmulBtbuiltin (C , A , B , M , K , N ) BIND(C, name= " AtmulBtbuiltin" )
@@ -194,10 +158,8 @@ subroutine dot3(s, x, A, y, M, N) BIND(C, name="dot3")
194158 real (C_double), intent (in ) :: x(M), A(M,N), y(N)
195159 real (C_double), intent (out ) :: s
196160 integer (C_long) :: mm, nn
197- do concurrent(nn = 1 :N)
198- do concurrent(mm = 1 :M)
199- s = s + x(mm) * A(mm, nn) * y(nn)
200- end do
161+ do concurrent(nn = 1 :N, mm = 1 :M)
162+ s = s + x(mm) * A(mm, nn) * y(nn)
201163 end do
202164 end subroutine dot3
203165 ! GCC$ builtin (exp) attributes simd (notinbranch) if('x86_64')
@@ -226,10 +188,8 @@ subroutine gemv(y, A, x, M, K) BIND(C, name="gemv")
226188 real (C_double), dimension (M), intent (out ) :: y
227189 integer (C_long) :: mm, kk
228190 y = 0.0
229- do concurrent(kk = 1 :K)
230- do concurrent(mm = 1 :M)
231- y(mm) = y(mm) + A(mm,kk) * x(kk)
232- end do
191+ do concurrent(kk = 1 :K, mm = 1 :M)
192+ y(mm) = y(mm) + A(mm,kk) * x(kk)
233193 end do
234194 end subroutine gemv
235195 subroutine gemvbuiltin (y , A , x , M , K ) BIND(C, name= " gemvbuiltin" )
@@ -244,12 +204,9 @@ subroutine Atmulvb(y, A, x, M, K) BIND(C, name="Atmulvb")
244204 real (C_double), dimension (M), intent (out ) :: y
245205 integer (C_long) :: mm, kk
246206 real (C_double) :: ymm
247- do concurrent(mm = 1 :M)
248- ymm = 0
249- do concurrent(kk = 1 :K)
250- ymm = ymm + A(kk,mm) * x(kk)
251- end do
252- y(mm) = ymm
207+ y = 0
208+ do concurrent(mm = 1 :M, kk = 1 :K)
209+ y(mm) = y(mm) + A(kk,mm) * x(kk)
253210 end do
254211 end subroutine Atmulvb
255212 subroutine Atmulvbbuiltin (y , A , x , M , K ) BIND(C, name= " Atmulvbbuiltin" )
@@ -266,22 +223,18 @@ subroutine unscaledvar(s, A, x, M, N) BIND(C, name="unscaledvar")
266223 integer (C_long) :: mm, nn
267224 real (C_double) :: d
268225 s = 0.0
269- do concurrent(nn = 1 :N)
270- do concurrent(mm = 1 :M)
271- d = A(mm,nn) - x(mm)
272- s(mm) = s(mm) + d * d
273- end do
226+ do concurrent(nn = 1 :N, mm = 1 :M)
227+ d = A(mm,nn) - x(mm)
228+ s(mm) = s(mm) + d * d
274229 end do
275230 end subroutine unscaledvar
276231 subroutine aplusBc (D , a , B , c , M , N ) BIND(C, name= " aplusBc" )
277232 integer (C_long), intent (in ) :: M, N
278233 real (C_double), intent (in ) :: a(M), B(M,N), c(N)
279234 real (C_double), dimension (M,N), intent (out ) :: D
280235 integer (C_long) :: mm, nn
281- do concurrent(nn = 1 :N)
282- do concurrent(mm = 1 :M)
283- D(mm,nn) = a(mm) + B(mm,nn) * c(nn)
284- end do
236+ do concurrent(nn = 1 :N, mm = 1 :M)
237+ D(mm,nn) = a(mm) + B(mm,nn) * c(nn)
285238 end do
286239 end subroutine aplusBc
287240 subroutine OLSlp (lp , y , X , b , N , P ) BIND(C, name= " OLSlp" )
@@ -299,15 +252,25 @@ subroutine OLSlp(lp, y, X, b, N, P) BIND(C, name="OLSlp")
299252 lp = lp + d* d
300253 end do
301254 end subroutine OLSlp
255+ subroutine OLSlpsplit (lp , y , X , b , N , P ) BIND(C, name= " OLSlpsplit" )
256+ integer (C_long), intent (in ) :: N, P
257+ real (C_double), intent (in ) :: y(N), X(N, P), b(P)
258+ real (C_double), intent (out ) :: lp
259+ integer (C_long) :: nn, pp
260+ real (C_double) :: d(N)
261+ d = y
262+ do concurrent(nn = 1 :N, pp = 1 :P)
263+ d(nn) = d(nn) - X(nn,pp) * b(pp)
264+ end do
265+ lp = dot_product (d, d)
266+ end subroutine OLSlpsplit
302267 subroutine AplusAt (B , A , N ) BIND(C, name= " AplusAt" )
303268 integer (C_long), intent (in ) :: N
304269 real (C_double), dimension (N,N), intent (out ) :: B
305270 real (C_double), dimension (N,N), intent (in ) :: A
306271 integer (C_long) :: i, j
307- do concurrent(i = 1 :N)
308- do concurrent(j = 1 :N)
309- B(j,i) = A(j,i) + A(i,j)
310- end do
272+ do concurrent(i = 1 :N, j = 1 :N)
273+ B(j,i) = A(j,i) + A(i,j)
311274 end do
312275 end subroutine AplusAt
313276 subroutine AplusAtbuiltin (B , A , N ) BIND(C, name= " AplusAtbuiltin" )
0 commit comments