Skip to content

Commit 5082fe4

Browse files
authored
Merge pull request #1564 from martin-frbg/issue1563
Revert changes from PR#1419
2 parents 9a400b7 + 7a7619a commit 5082fe4

File tree

8 files changed

+147
-84
lines changed

8 files changed

+147
-84
lines changed

kernel/generic/trmm_ltcopy_2.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,22 +116,34 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
116116
if (m & 1) {
117117

118118
if (X > posY) {
119-
/* ao1 += 1;
120-
ao2 += 1; */
119+
ao1 += 1;
120+
ao2 += 1;
121121
b += 2;
122122
} else
123-
#ifdef UNIT
124123
if (X < posY) {
125-
#endif
126-
b[ 0] = *(ao1 + 0);
127-
#ifdef UNIT
124+
data01 = *(ao1 + 0);
125+
data02 = *(ao1 + 1);
126+
127+
b[ 0] = data01;
128+
b[ 1] = data02;
129+
ao1 += lda;
130+
b += 2;
128131
} else {
132+
#ifdef UNIT
133+
data02 = *(ao1 + 1);
129134

130135
b[ 0] = ONE;
136+
b[ 1] = data02;
137+
#else
138+
data01 = *(ao1 + 0);
139+
data02 = *(ao1 + 1);
140+
141+
b[ 0] = data01;
142+
b[ 1] = data02;
143+
#endif
144+
ao1 += 2;
145+
b += 2;
131146
}
132-
#endif
133-
b[ 1] = *(ao1 + 1);
134-
b += 2;
135147
}
136148

137149
posY += 2;
@@ -178,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
178190
} while (i > 0);
179191
}
180192

181-
// posY += 1;
193+
posY += 1;
182194
}
183195

184196
return 0;

kernel/generic/trmm_utcopy_16.c

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
518518
i = (m & 15);
519519
if (i > 0) {
520520
if (X < posY) {
521-
/* a01 += i;
521+
a01 += i;
522522
a02 += i;
523523
a03 += i;
524524
a04 += i;
@@ -533,7 +533,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
533533
a13 += i;
534534
a14 += i;
535535
a15 += i;
536-
a16 += i; */
536+
a16 += i;
537537
b += 16 * i;
538538
} else
539539
if (X > posY) {
@@ -1130,14 +1130,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
11301130
i = (m & 7);
11311131
if (i > 0) {
11321132
if (X < posY) {
1133-
/* a01 += i;
1133+
a01 += i;
11341134
a02 += i;
11351135
a03 += i;
11361136
a04 += i;
11371137
a05 += i;
11381138
a06 += i;
11391139
a07 += i;
1140-
a08 += i; */
1140+
a08 += i;
11411141
b += 8 * i;
11421142
} else
11431143
if (X > posY) {
@@ -1156,13 +1156,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
11561156
b += 8;
11571157
}
11581158

1159-
/* a02 += i * lda;
1159+
a02 += i * lda;
11601160
a03 += i * lda;
11611161
a04 += i * lda;
11621162
a05 += i * lda;
11631163
a06 += i * lda;
11641164
a07 += i * lda;
1165-
a08 += i * lda; */
1165+
a08 += i * lda;
11661166
} else {
11671167
#ifdef UNIT
11681168
b[ 0] = ONE;
@@ -1371,10 +1371,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
13711371
i = (m & 3);
13721372
if (i > 0) {
13731373
if (X < posY) {
1374-
/* a01 += i;
1374+
a01 += i;
13751375
a02 += i;
13761376
a03 += i;
1377-
a04 += i; */
1377+
a04 += i;
13781378
b += 4 * i;
13791379
} else
13801380
if (X > posY) {
@@ -1387,9 +1387,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
13871387
a01 += lda;
13881388
b += 4;
13891389
}
1390-
/* a02 += lda;
1390+
a02 += lda;
13911391
a03 += lda;
1392-
a04 += lda; */
1392+
a04 += lda;
13931393
} else {
13941394

13951395
#ifdef UNIT
@@ -1487,19 +1487,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
14871487
if (X < posY) {
14881488
a01 ++;
14891489
a02 ++;
1490-
} else {
1491-
#ifdef UNIT
1490+
b += 2;
1491+
} else
14921492
if (X > posY) {
1493-
#endif
14941493
b[ 0] = *(a01 + 0);
1495-
#ifdef UNIT
1494+
b[ 1] = *(a01 + 1);
1495+
a01 += lda;
1496+
b += 2;
14961497
} else {
1498+
#ifdef UNIT
14971499
b[ 0] = ONE;
1498-
}
1500+
b[ 1] = *(a01 + 1);
1501+
#else
1502+
b[ 0] = *(a01 + 0);
1503+
b[ 1] = *(a01 + 1);
14991504
#endif
1500-
b[ 1] = *(a01 + 1);
1501-
}
1502-
b += 2;
1505+
b += 2;
1506+
}
15031507
}
15041508
posY += 2;
15051509
}
@@ -1518,25 +1522,28 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
15181522
if (i > 0) {
15191523
do {
15201524
if (X < posY) {
1521-
a01 ++;
1522-
} else {
1523-
#ifdef UNIT
1525+
a01 += 1;
1526+
b ++;
1527+
} else
15241528
if (X > posY) {
1525-
#endif
15261529
b[ 0] = *(a01 + 0);
1527-
#ifdef UNIT
1530+
a01 += lda;
1531+
b ++;
15281532
} else {
1533+
#ifdef UNIT
15291534
b[ 0] = ONE;
1530-
}
1535+
#else
1536+
b[ 0] = *(a01 + 0);
15311537
#endif
1532-
a01 += lda;
1533-
}
1534-
b ++;
1535-
X ++;
1536-
i --;
1538+
a01 += lda;
1539+
b ++;
1540+
}
1541+
1542+
X += 1;
1543+
i --;
15371544
} while (i > 0);
15381545
}
1539-
// posY += 1;
1546+
posY += 1;
15401547
}
15411548

15421549
return 0;

kernel/generic/trmm_utcopy_2.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
117117
if (m & 1) {
118118

119119
if (X < posY) {
120-
/* ao1 += 1;
121-
ao2 += 1; */
120+
ao1 += 1;
121+
ao2 += 1;
122122
b += 2;
123123
} else
124124
if (X > posY) {
@@ -127,7 +127,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
127127

128128
b[ 0] = data01;
129129
b[ 1] = data02;
130-
// ao1 += lda;
130+
ao1 += lda;
131131
b += 2;
132132
} else {
133133
#ifdef UNIT
@@ -139,7 +139,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
139139
b[ 0] = data01;
140140
b[ 1] = ZERO;
141141
#endif
142-
// ao1 += lda;
142+
ao1 += lda;
143143
b += 2;
144144
}
145145
}
@@ -161,18 +161,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
161161
i = m;
162162
if (m > 0) {
163163
do {
164+
if (X < posY) {
165+
b += 1;
166+
ao1 += 1;
167+
} else
168+
if (X > posY) {
169+
data01 = *(ao1 + 0);
170+
b[ 0] = data01;
171+
b += 1;
172+
ao1 += lda;
173+
} else {
164174
#ifdef UNIT
165-
if (X > posY) {
166-
#endif
167-
b[ 0] = *(ao1 + 0);
168-
#ifdef UNIT
169-
} else {
170-
b[ 0] = ONE;
171-
}
175+
b[ 0] = ONE;
176+
#else
177+
data01 = *(ao1 + 0);
178+
b[ 0] = data01;
172179
#endif
173-
b ++;
174-
ao1 += lda;
175-
X ++;
180+
b += 1;
181+
ao1 += lda;
182+
}
183+
184+
X += 1;
176185
i --;
177186
} while (i > 0);
178187
}

kernel/generic/trmm_utcopy_4.c

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -201,18 +201,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
201201
if (X < posY) {
202202

203203
if (m & 2) {
204-
/* ao1 += 2;
204+
ao1 += 2;
205205
ao2 += 2;
206206
ao3 += 2;
207-
ao4 += 2; */
207+
ao4 += 2;
208208
b += 8;
209209
}
210210

211211
if (m & 1) {
212-
/* ao1 += 1;
212+
ao1 += 1;
213213
ao2 += 1;
214214
ao3 += 1;
215-
ao4 += 1; */
215+
ao4 += 1;
216216
b += 4;
217217
}
218218

@@ -238,7 +238,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
238238
b[ 7] = data08;
239239

240240
ao1 += 2 * lda;
241-
// ao2 += 2 * lda;
241+
ao2 += 2 * lda;
242242
b += 8;
243243
}
244244

@@ -253,7 +253,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
253253
b[ 2] = data03;
254254
b[ 3] = data04;
255255

256-
// ao1 += lda;
256+
ao1 += lda;
257257
b += 4;
258258
}
259259

@@ -401,7 +401,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
401401
if (i) {
402402

403403
if (X < posY) {
404-
// ao1 += 2;
404+
ao1 += 2;
405405
b += 2;
406406
} else
407407
if (X > posY) {
@@ -411,7 +411,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
411411
b[ 0] = data01;
412412
b[ 1] = data02;
413413

414-
// ao1 += lda;
414+
ao1 += lda;
415415
b += 2;
416416
} else {
417417
#ifdef UNIT
@@ -443,21 +443,26 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
443443
do {
444444

445445
if (X < posY) {
446+
b += 1;
446447
ao1 += 1;
447-
} else {
448-
#ifdef UNIT
448+
} else
449449
if (X > posY) {
450-
#endif
451-
b[ 0] = *(ao1 + 0);
452-
#ifdef UNIT
450+
data01 = *(ao1 + 0);
451+
b[ 0] = data01;
452+
ao1 += lda;
453+
b += 1;
453454
} else {
455+
#ifdef UNIT
454456
b[ 0] = ONE;
455-
}
457+
#else
458+
data01 = *(ao1 + 0);
459+
b[ 0] = data01;
456460
#endif
457-
ao1 += lda;
458-
}
459-
b ++;
460-
X ++;
461+
ao1 += lda;
462+
b += 1;
463+
}
464+
465+
X += 1;
461466
i --;
462467
} while (i > 0);
463468
}

kernel/generic/trsm_ltcopy_4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
206206
}
207207

208208
a1 += 2 * lda;
209-
// a2 += 2 * lda;
209+
a2 += 2 * lda;
210210
b += 8;
211211

212212
ii += 2;

0 commit comments

Comments
 (0)