Skip to content

Commit 94cd946

Browse files
authored
[ZARCH] fix cgemv_n_4.c
1 parent 1aa840a commit 94cd946

File tree

1 file changed

+166
-166
lines changed

1 file changed

+166
-166
lines changed

kernel/zarch/cgemv_n_4.c

Lines changed: 166 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -34,107 +34,107 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434
static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
3535
{
3636
__asm__ volatile (
37-
"vlrepg %%v16,0(%5) \n\t"
38-
"vlrepg %%v17,8(%5) \n\t"
39-
"vlrepg %%v18,16(%5) \n\t"
40-
"vlrepg %%v19,24(%5) \n\t"
37+
"vlrepg %%v16,0(%5) \n\t"
38+
"vlrepg %%v17,8(%5) \n\t"
39+
"vlrepg %%v18,16(%5) \n\t"
40+
"vlrepg %%v19,24(%5) \n\t"
4141
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
42-
"vlef %%v20,4(%5),0 \n\t"
43-
"vlef %%v20,4(%5),2 \n\t"
44-
"vflcsb %%v20,%%v20 \n\t"
45-
"vlef %%v20,0(%5),1 \n\t"
46-
"vlef %%v20,0(%5),3 \n\t"
47-
48-
"vlef %%v21,12(%5),0 \n\t"
49-
"vlef %%v21,12(%5),2 \n\t"
50-
"vflcsb %%v21,%%v21 \n\t"
51-
"vlef %%v21,8(%5),1 \n\t"
52-
"vlef %%v21,8(%5),3 \n\t"
53-
54-
"vlef %%v22,20(%5),0 \n\t"
55-
"vlef %%v22,20(%5),2 \n\t"
56-
"vflcsb %%v22,%%v22 \n\t"
57-
"vlef %%v22,16(%5),1 \n\t"
58-
"vlef %%v22,16(%5),3 \n\t"
59-
60-
"vlef %%v23,28(%5),0 \n\t"
61-
"vlef %%v23,28(%5),2 \n\t"
62-
"vflcsb %%v23,%%v23 \n\t"
63-
"vlef %%v23,24(%5),1 \n\t"
64-
"vlef %%v23,24(%5),3 \n\t"
42+
"vlef %%v20,4(%5),0 \n\t"
43+
"vlef %%v20,4(%5),2 \n\t"
44+
"vflcsb %%v20,%%v20 \n\t"
45+
"vlef %%v20,0(%5),1 \n\t"
46+
"vlef %%v20,0(%5),3 \n\t"
47+
48+
"vlef %%v21,12(%5),0 \n\t"
49+
"vlef %%v21,12(%5),2 \n\t"
50+
"vflcsb %%v21,%%v21 \n\t"
51+
"vlef %%v21,8(%5),1 \n\t"
52+
"vlef %%v21,8(%5),3 \n\t"
53+
54+
"vlef %%v22,20(%5),0 \n\t"
55+
"vlef %%v22,20(%5),2 \n\t"
56+
"vflcsb %%v22,%%v22 \n\t"
57+
"vlef %%v22,16(%5),1 \n\t"
58+
"vlef %%v22,16(%5),3 \n\t"
59+
60+
"vlef %%v23,28(%5),0 \n\t"
61+
"vlef %%v23,28(%5),2 \n\t"
62+
"vflcsb %%v23,%%v23 \n\t"
63+
"vlef %%v23,24(%5),1 \n\t"
64+
"vlef %%v23,24(%5),3 \n\t"
6565
#else
66-
"vlef %%v20,0(%5),1 \n\t"
67-
"vlef %%v20,0(%5),3 \n\t"
68-
"vflcsb %%v20,%%v20 \n\t"
69-
"vlef %%v20,4(%5),0 \n\t"
70-
"vlef %%v20,4(%5),2 \n\t"
71-
72-
"vlef %%v21,8(%5),1 \n\t"
73-
"vlef %%v21,8(%5),3 \n\t"
74-
"vflcsb %%v21,%%v21 \n\t"
75-
"vlef %%v21,12(%5),0 \n\t"
76-
"vlef %%v21,12(%5),2 \n\t"
77-
78-
"vlef %%v22,16(%5),1 \n\t"
79-
"vlef %%v22,16(%5),3 \n\t"
80-
"vflcsb %%v22,%%v22 \n\t"
81-
"vlef %%v22,20(%5),0 \n\t"
82-
"vlef %%v22,20(%5),2 \n\t"
83-
84-
"vlef %%v23,24(%5),1 \n\t"
85-
"vlef %%v23,24(%5),3 \n\t"
86-
"vflcsb %%v23,%%v23 \n\t"
87-
"vlef %%v23,28(%5),0 \n\t"
88-
"vlef %%v23,28(%5),2 \n\t"
66+
"vlef %%v20,0(%5),1 \n\t"
67+
"vlef %%v20,0(%5),3 \n\t"
68+
"vflcsb %%v20,%%v20 \n\t"
69+
"vlef %%v20,4(%5),0 \n\t"
70+
"vlef %%v20,4(%5),2 \n\t"
71+
72+
"vlef %%v21,8(%5),1 \n\t"
73+
"vlef %%v21,8(%5),3 \n\t"
74+
"vflcsb %%v21,%%v21 \n\t"
75+
"vlef %%v21,12(%5),0 \n\t"
76+
"vlef %%v21,12(%5),2 \n\t"
77+
78+
"vlef %%v22,16(%5),1 \n\t"
79+
"vlef %%v22,16(%5),3 \n\t"
80+
"vflcsb %%v22,%%v22 \n\t"
81+
"vlef %%v22,20(%5),0 \n\t"
82+
"vlef %%v22,20(%5),2 \n\t"
83+
84+
"vlef %%v23,24(%5),1 \n\t"
85+
"vlef %%v23,24(%5),3 \n\t"
86+
"vflcsb %%v23,%%v23 \n\t"
87+
"vlef %%v23,28(%5),0 \n\t"
88+
"vlef %%v23,28(%5),2 \n\t"
8989
#endif
90-
"xgr %%r1,%%r1 \n\t"
91-
"srlg %%r0,%%r0,1 \n\t"
92-
"0: \n\t"
93-
"pfd 1,1024(%%r1,%1) \n\t"
94-
"pfd 1,1024(%%r1,%2) \n\t"
95-
"pfd 1,1024(%%r1,%3) \n\t"
96-
"pfd 1,1024(%%r1,%4) \n\t"
97-
"pfd 2,1024(%%r1,%6) \n\t"
98-
99-
"vlef %%v24,0(%%r1,%1),0 \n\t"
100-
"vlef %%v24,0(%%r1,%1),1 \n\t"
101-
"vlef %%v24,8(%%r1,%1),2 \n\t"
102-
"vlef %%v24,8(%%r1,%1),3 \n\t"
103-
"vlef %%v25,4(%%r1,%1),0 \n\t"
104-
"vlef %%v25,4(%%r1,%1),1 \n\t"
105-
"vlef %%v25,12(%%r1,%1),2 \n\t"
106-
"vlef %%v25,12(%%r1,%1),3 \n\t"
107-
"vlef %%v26,0(%%r1,%2),0 \n\t"
108-
"vlef %%v26,0(%%r1,%2),1 \n\t"
109-
"vlef %%v26,8(%%r1,%2),2 \n\t"
110-
"vlef %%v26,8(%%r1,%2),3 \n\t"
111-
"vlef %%v27,4(%%r1,%2),0 \n\t"
112-
"vlef %%v27,4(%%r1,%2),1 \n\t"
113-
"vlef %%v27,12(%%r1,%2),2 \n\t"
114-
"vlef %%v27,12(%%r1,%2),3 \n\t"
115-
116-
"vl %%v0,0(%%r1,%6) \n\t"
117-
"vfmasb %%v0,%%v24,%%v16,%%v0 \n\t"
118-
"vfmasb %%v0,%%v25,%%v20,%%v0 \n\t"
119-
"vfmasb %%v0,%%v26,%%v17,%%v0 \n\t"
120-
"vfmasb %%v0,%%v27,%%v21,%%v0 \n\t"
121-
122-
"vlef %%v28,0(%%r1,%1),0 \n\t"
123-
"vlef %%v28,0(%%r1,%1),1 \n\t"
124-
"vlef %%v28,8(%%r1,%1),2 \n\t"
125-
"vlef %%v28,8(%%r1,%1),3 \n\t"
126-
"vlef %%v29,4(%%r1,%1),0 \n\t"
127-
"vlef %%v29,4(%%r1,%1),1 \n\t"
128-
"vlef %%v29,12(%%r1,%1),2 \n\t"
129-
"vlef %%v29,12(%%r1,%1),3 \n\t"
130-
"vlef %%v30,0(%%r1,%2),0 \n\t"
131-
"vlef %%v30,0(%%r1,%2),1 \n\t"
132-
"vlef %%v30,8(%%r1,%2),2 \n\t"
133-
"vlef %%v30,8(%%r1,%2),3 \n\t"
134-
"vlef %%v31,4(%%r1,%2),0 \n\t"
135-
"vlef %%v31,4(%%r1,%2),1 \n\t"
136-
"vlef %%v31,12(%%r1,%2),2 \n\t"
137-
"vlef %%v31,12(%%r1,%2),3 \n\t"
90+
"xgr %%r1,%%r1 \n\t"
91+
"srlg %%r0,%0,1 \n\t"
92+
"0: \n\t"
93+
"pfd 1,1024(%%r1,%1) \n\t"
94+
"pfd 1,1024(%%r1,%2) \n\t"
95+
"pfd 1,1024(%%r1,%3) \n\t"
96+
"pfd 1,1024(%%r1,%4) \n\t"
97+
"pfd 2,1024(%%r1,%6) \n\t"
98+
99+
"vlef %%v24,0(%%r1,%1),0 \n\t"
100+
"vlef %%v24,0(%%r1,%1),1 \n\t"
101+
"vlef %%v24,8(%%r1,%1),2 \n\t"
102+
"vlef %%v24,8(%%r1,%1),3 \n\t"
103+
"vlef %%v25,4(%%r1,%1),0 \n\t"
104+
"vlef %%v25,4(%%r1,%1),1 \n\t"
105+
"vlef %%v25,12(%%r1,%1),2 \n\t"
106+
"vlef %%v25,12(%%r1,%1),3 \n\t"
107+
"vlef %%v26,0(%%r1,%2),0 \n\t"
108+
"vlef %%v26,0(%%r1,%2),1 \n\t"
109+
"vlef %%v26,8(%%r1,%2),2 \n\t"
110+
"vlef %%v26,8(%%r1,%2),3 \n\t"
111+
"vlef %%v27,4(%%r1,%2),0 \n\t"
112+
"vlef %%v27,4(%%r1,%2),1 \n\t"
113+
"vlef %%v27,12(%%r1,%2),2 \n\t"
114+
"vlef %%v27,12(%%r1,%2),3 \n\t"
115+
116+
"vl %%v0,0(%%r1,%6) \n\t"
117+
"vfmasb %%v0,%%v24,%%v16,%%v0 \n\t"
118+
"vfmasb %%v0,%%v25,%%v20,%%v0 \n\t"
119+
"vfmasb %%v0,%%v26,%%v17,%%v0 \n\t"
120+
"vfmasb %%v0,%%v27,%%v21,%%v0 \n\t"
121+
122+
"vlef %%v28,0(%%r1,%1),0 \n\t"
123+
"vlef %%v28,0(%%r1,%1),1 \n\t"
124+
"vlef %%v28,8(%%r1,%1),2 \n\t"
125+
"vlef %%v28,8(%%r1,%1),3 \n\t"
126+
"vlef %%v29,4(%%r1,%1),0 \n\t"
127+
"vlef %%v29,4(%%r1,%1),1 \n\t"
128+
"vlef %%v29,12(%%r1,%1),2 \n\t"
129+
"vlef %%v29,12(%%r1,%1),3 \n\t"
130+
"vlef %%v30,0(%%r1,%2),0 \n\t"
131+
"vlef %%v30,0(%%r1,%2),1 \n\t"
132+
"vlef %%v30,8(%%r1,%2),2 \n\t"
133+
"vlef %%v30,8(%%r1,%2),3 \n\t"
134+
"vlef %%v31,4(%%r1,%2),0 \n\t"
135+
"vlef %%v31,4(%%r1,%2),1 \n\t"
136+
"vlef %%v31,12(%%r1,%2),2 \n\t"
137+
"vlef %%v31,12(%%r1,%2),3 \n\t"
138138

139139
"vfmasb %%v0,%%v28,%%v18,%%v0 \n\t"
140140
"vfmasb %%v0,%%v29,%%v22,%%v0 \n\t"
@@ -153,56 +153,56 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
153153
static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
154154
{
155155
__asm__ volatile (
156-
"vlrepg %%v16,0(%3) \n\t"
157-
"vlrepg %%v17,8(%3) \n\t"
156+
"vlrepg %%v16,0(%3) \n\t"
157+
"vlrepg %%v17,8(%3) \n\t"
158158
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
159-
"vlef %%v18,4(%3),0 \n\t"
160-
"vlef %%v18,4(%3),2 \n\t"
161-
"vflcsb %%v18,%%v18 \n\t"
162-
"vlef %%v18,0(%3),1 \n\t"
163-
"vlef %%v18,0(%3),3 \n\t"
164-
165-
"vlef %%v19,12(%3),0 \n\t"
166-
"vlef %%v19,12(%3),2 \n\t"
167-
"vflcsb %%v19,%%v19 \n\t"
168-
"vlef %%v19,8(%3),1 \n\t"
169-
"vlef %%v19,8(%3),3 \n\t"
159+
"vlef %%v18,4(%3),0 \n\t"
160+
"vlef %%v18,4(%3),2 \n\t"
161+
"vflcsb %%v18,%%v18 \n\t"
162+
"vlef %%v18,0(%3),1 \n\t"
163+
"vlef %%v18,0(%3),3 \n\t"
164+
165+
"vlef %%v19,12(%3),0 \n\t"
166+
"vlef %%v19,12(%3),2 \n\t"
167+
"vflcsb %%v19,%%v19 \n\t"
168+
"vlef %%v19,8(%3),1 \n\t"
169+
"vlef %%v19,8(%3),3 \n\t"
170170
#else
171-
"vlef %%v18,0(%3),1 \n\t"
172-
"vlef %%v18,0(%3),3 \n\t"
173-
"vflcsb %%v18,%%v18 \n\t"
174-
"vlef %%v18,4(%3),0 \n\t"
175-
"vlef %%v18,4(%3),2 \n\t"
176-
177-
"vlef %%v19,8(%3),1 \n\t"
178-
"vlef %%v19,8(%3),3 \n\t"
179-
"vflcsb %%v19,%%v19 \n\t"
180-
"vlef %%v19,12(%3),0 \n\t"
181-
"vlef %%v19,12(%3),2 \n\t"
171+
"vlef %%v18,0(%3),1 \n\t"
172+
"vlef %%v18,0(%3),3 \n\t"
173+
"vflcsb %%v18,%%v18 \n\t"
174+
"vlef %%v18,4(%3),0 \n\t"
175+
"vlef %%v18,4(%3),2 \n\t"
176+
177+
"vlef %%v19,8(%3),1 \n\t"
178+
"vlef %%v19,8(%3),3 \n\t"
179+
"vflcsb %%v19,%%v19 \n\t"
180+
"vlef %%v19,12(%3),0 \n\t"
181+
"vlef %%v19,12(%3),2 \n\t"
182182
#endif
183-
"xgr %%r1,%%r1 \n\t"
184-
"srlg %%r0,%%r0,1 \n\t"
185-
"0: \n\t"
186-
"pfd 1,1024(%%r1,%1) \n\t"
187-
"pfd 1,1024(%%r1,%2) \n\t"
188-
"pfd 2,1024(%%r1,%4) \n\t"
189-
190-
"vlef %%v20,0(%%r1,%1),0 \n\t"
191-
"vlef %%v20,0(%%r1,%1),1 \n\t"
192-
"vlef %%v20,8(%%r1,%1),2 \n\t"
193-
"vlef %%v20,8(%%r1,%1),3 \n\t"
194-
"vlef %%v21,4(%%r1,%1),0 \n\t"
195-
"vlef %%v21,4(%%r1,%1),1 \n\t"
196-
"vlef %%v21,12(%%r1,%1),2 \n\t"
197-
"vlef %%v21,12(%%r1,%1),3 \n\t"
198-
"vlef %%v22,0(%%r1,%2),0 \n\t"
199-
"vlef %%v22,0(%%r1,%2),1 \n\t"
200-
"vlef %%v22,8(%%r1,%2),2 \n\t"
201-
"vlef %%v22,8(%%r1,%2),3 \n\t"
202-
"vlef %%v23,4(%%r1,%2),0 \n\t"
203-
"vlef %%v23,4(%%r1,%2),1 \n\t"
204-
"vlef %%v23,12(%%r1,%2),2 \n\t"
205-
"vlef %%v23,12(%%r1,%2),3 \n\t"
183+
"xgr %%r1,%%r1 \n\t"
184+
"srlg %%r0,%0,1 \n\t"
185+
"0: \n\t"
186+
"pfd 1,1024(%%r1,%1) \n\t"
187+
"pfd 1,1024(%%r1,%2) \n\t"
188+
"pfd 2,1024(%%r1,%4) \n\t"
189+
190+
"vlef %%v20,0(%%r1,%1),0 \n\t"
191+
"vlef %%v20,0(%%r1,%1),1 \n\t"
192+
"vlef %%v20,8(%%r1,%1),2 \n\t"
193+
"vlef %%v20,8(%%r1,%1),3 \n\t"
194+
"vlef %%v21,4(%%r1,%1),0 \n\t"
195+
"vlef %%v21,4(%%r1,%1),1 \n\t"
196+
"vlef %%v21,12(%%r1,%1),2 \n\t"
197+
"vlef %%v21,12(%%r1,%1),3 \n\t"
198+
"vlef %%v22,0(%%r1,%2),0 \n\t"
199+
"vlef %%v22,0(%%r1,%2),1 \n\t"
200+
"vlef %%v22,8(%%r1,%2),2 \n\t"
201+
"vlef %%v22,8(%%r1,%2),3 \n\t"
202+
"vlef %%v23,4(%%r1,%2),0 \n\t"
203+
"vlef %%v23,4(%%r1,%2),1 \n\t"
204+
"vlef %%v23,12(%%r1,%2),2 \n\t"
205+
"vlef %%v23,12(%%r1,%2),3 \n\t"
206206

207207
"vl %%v0,0(%%r1,%4) \n\t"
208208
"vfmasb %%v0,%%v20,%%v16,%%v0 \n\t"
@@ -222,34 +222,34 @@ static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
222222
static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
223223
{
224224
__asm__ volatile (
225-
"vlrepg %%v16,0(%2) \n\t"
225+
"vlrepg %%v16,0(%2) \n\t"
226226
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
227227
"vlef %%v17,4(%2),0 \n\t"
228-
"vlef %%v17,4(%2),2 \n\t"
228+
"vlef %%v17,4(%2),2 \n\t"
229229
"vflcsb %%v17,%%v17 \n\t"
230230
"vlef %%v17,0(%2),1 \n\t"
231-
"vlef %%v17,0(%2),3 \n\t"
231+
"vlef %%v17,0(%2),3 \n\t"
232232
#else
233233
"vlef %%v17,0(%2),1 \n\t"
234-
"vlef %%v17,0(%2),3 \n\t"
234+
"vlef %%v17,0(%2),3 \n\t"
235235
"vflcsb %%v17,%%v17 \n\t"
236236
"vlef %%v17,4(%2),0 \n\t"
237-
"vlef %%v17,4(%2),2 \n\t"
237+
"vlef %%v17,4(%2),2 \n\t"
238238
#endif
239239
"xgr %%r1,%%r1 \n\t"
240-
"srlg %%r0,%%r0,1 \n\t"
240+
"srlg %%r0,%0,1 \n\t"
241241
"0: \n\t"
242242
"pfd 1,1024(%%r1,%1) \n\t"
243243
"pfd 2,1024(%%r1,%3) \n\t"
244244

245-
"vlef %%v18,0(%%r1,%1),0 \n\t"
246-
"vlef %%v18,0(%%r1,%1),1 \n\t"
247-
"vlef %%v18,8(%%r1,%1),2 \n\t"
248-
"vlef %%v18,8(%%r1,%1),3 \n\t"
249-
"vlef %%v19,4(%%r1,%1),0 \n\t"
250-
"vlef %%v19,4(%%r1,%1),1 \n\t"
251-
"vlef %%v19,12(%%r1,%1),2 \n\t"
252-
"vlef %%v19,12(%%r1,%1),3 \n\t"
245+
"vlef %%v18,0(%%r1,%1),0 \n\t"
246+
"vlef %%v18,0(%%r1,%1),1 \n\t"
247+
"vlef %%v18,8(%%r1,%1),2 \n\t"
248+
"vlef %%v18,8(%%r1,%1),3 \n\t"
249+
"vlef %%v19,4(%%r1,%1),0 \n\t"
250+
"vlef %%v19,4(%%r1,%1),1 \n\t"
251+
"vlef %%v19,12(%%r1,%1),2 \n\t"
252+
"vlef %%v19,12(%%r1,%1),3 \n\t"
253253

254254
"vl %%v0,0(%%r1,%3) \n\t"
255255
"vfmasb %%v0,%%v18,%%v16,%%v0 \n\t"
@@ -268,18 +268,18 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, FLOAT al
268268
{
269269
__asm__ volatile (
270270
#if !defined(XCONJ)
271-
"vlrepf %%v0,%3 \n\t"
272-
"vlef %%v1,%4,0 \n\t"
273-
"vlef %%v1,%4,2 \n\t"
271+
"vlrepf %%v0,%3 \n\t"
272+
"vlef %%v1,%4,0 \n\t"
273+
"vlef %%v1,%4,2 \n\t"
274274
"vflcsb %%v1,%%v1 \n\t"
275-
"vlef %%v1,%4,1 \n\t"
275+
"vlef %%v1,%4,1 \n\t"
276276
"vlef %%v1,%4,3 \n\t"
277277
#else
278278
"vlef %%v0,%3,1 \n\t"
279-
"vlef %%v0,%3,3 \n\t"
279+
"vlef %%v0,%3,3 \n\t"
280280
"vflcsb %%v0,%%v0 \n\t"
281281
"vlef %%v0,%3,0 \n\t"
282-
"vlef %%v0,%3,2 \n\t"
282+
"vlef %%v0,%3,2 \n\t"
283283
"vlrepf %%v1,%4 \n\t"
284284
#endif
285285
"xgr %%r1,%%r1 \n\t"
@@ -292,7 +292,7 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, FLOAT al
292292
"vl %%v17,16(%%r1,%1) \n\t"
293293
"vl %%v18,0(%%r1,%2) \n\t"
294294
"vl %%v19,16(%%r1,%2) \n\t"
295-
"verllg %%v20,%%v16,32 \n\t"
295+
"verllg %%v20,%%v16,32 \n\t"
296296
"verllg %%v21,%%v17,32 \n\t"
297297

298298
"vfmasb %%v22,%%v16,%%v0,%%v18 \n\t"

0 commit comments

Comments
 (0)